PROJECT TOPIC - CREDIT CARD FRAUD DETECTION

#1. Data Description: Time- Number of seconds elapsed between this transaction and the first transaction in the dataset V1-V28 - Result of a PCA Dimensionality reduction to protect user identities and sensitive features(v1-v28) Amount - Transaction amount Class - 1 for fraudulent transactions, 0 otherwise

#Goal

To identify fraudulent transactions which can result in significant financial losses for individuals and businesses. The goal is to develop a machine learning model that can accurately classify transactions as either fraudulent or legitimate, based on various features such as transaction amount, location, and time.

#Dataset Reference:

The Credit Card Fraud Detection dataset on Kaggle at the following link https://www.kaggle.com/mlg-ulb/creditcardfraud Links to an external site. The dataset has 31 variables, with the target variable indicating whether a transaction is fraudulent or not.

#Model:

I will be using a classification model to solve the problem of credit card fraud detection. The outcome variable will be a binary variable indicating whether a credit card transaction is fraudulent or not.

library(tidyverse)
library(caret)
library(ggplot2)
library(lattice)
library(gridExtra)
library(corrplot)
library(ROCR)

#1 Loading the dataset:

credit_card <- read.csv("/Users/vinoth/Downloads/creditcard.csv")
credit_card$Class = factor(credit_card$Class,levels = c(0,1))

Getting the summary of structure of dataset

head(credit_card)
dim(credit_card)
[1] 284807     31
str(credit_card)
'data.frame':   284807 obs. of  31 variables:
 $ Time  : num  0 0 1 1 2 2 4 7 7 9 ...
 $ V1    : num  -1.36 1.192 -1.358 -0.966 -1.158 ...
 $ V2    : num  -0.0728 0.2662 -1.3402 -0.1852 0.8777 ...
 $ V3    : num  2.536 0.166 1.773 1.793 1.549 ...
 $ V4    : num  1.378 0.448 0.38 -0.863 0.403 ...
 $ V5    : num  -0.3383 0.06 -0.5032 -0.0103 -0.4072 ...
 $ V6    : num  0.4624 -0.0824 1.8005 1.2472 0.0959 ...
 $ V7    : num  0.2396 -0.0788 0.7915 0.2376 0.5929 ...
 $ V8    : num  0.0987 0.0851 0.2477 0.3774 -0.2705 ...
 $ V9    : num  0.364 -0.255 -1.515 -1.387 0.818 ...
 $ V10   : num  0.0908 -0.167 0.2076 -0.055 0.7531 ...
 $ V11   : num  -0.552 1.613 0.625 -0.226 -0.823 ...
 $ V12   : num  -0.6178 1.0652 0.0661 0.1782 0.5382 ...
 $ V13   : num  -0.991 0.489 0.717 0.508 1.346 ...
 $ V14   : num  -0.311 -0.144 -0.166 -0.288 -1.12 ...
 $ V15   : num  1.468 0.636 2.346 -0.631 0.175 ...
 $ V16   : num  -0.47 0.464 -2.89 -1.06 -0.451 ...
 $ V17   : num  0.208 -0.115 1.11 -0.684 -0.237 ...
 $ V18   : num  0.0258 -0.1834 -0.1214 1.9658 -0.0382 ...
 $ V19   : num  0.404 -0.146 -2.262 -1.233 0.803 ...
 $ V20   : num  0.2514 -0.0691 0.525 -0.208 0.4085 ...
 $ V21   : num  -0.01831 -0.22578 0.248 -0.1083 -0.00943 ...
 $ V22   : num  0.27784 -0.63867 0.77168 0.00527 0.79828 ...
 $ V23   : num  -0.11 0.101 0.909 -0.19 -0.137 ...
 $ V24   : num  0.0669 -0.3398 -0.6893 -1.1756 0.1413 ...
 $ V25   : num  0.129 0.167 -0.328 0.647 -0.206 ...
 $ V26   : num  -0.189 0.126 -0.139 -0.222 0.502 ...
 $ V27   : num  0.13356 -0.00898 -0.05535 0.06272 0.21942 ...
 $ V28   : num  -0.0211 0.0147 -0.0598 0.0615 0.2152 ...
 $ Amount: num  149.62 2.69 378.66 123.5 69.99 ...
 $ Class : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
summary(credit_card)
      Time              V1                  V2                  V3                 V4                 V5            
 Min.   :     0   Min.   :-56.40751   Min.   :-72.71573   Min.   :-48.3256   Min.   :-5.68317   Min.   :-113.74331  
 1st Qu.: 54202   1st Qu.: -0.92037   1st Qu.: -0.59855   1st Qu.: -0.8904   1st Qu.:-0.84864   1st Qu.:  -0.69160  
 Median : 84692   Median :  0.01811   Median :  0.06549   Median :  0.1799   Median :-0.01985   Median :  -0.05434  
 Mean   : 94814   Mean   :  0.00000   Mean   :  0.00000   Mean   :  0.0000   Mean   : 0.00000   Mean   :   0.00000  
 3rd Qu.:139320   3rd Qu.:  1.31564   3rd Qu.:  0.80372   3rd Qu.:  1.0272   3rd Qu.: 0.74334   3rd Qu.:   0.61193  
 Max.   :172792   Max.   :  2.45493   Max.   : 22.05773   Max.   :  9.3826   Max.   :16.87534   Max.   :  34.80167  
       V6                 V7                 V8                  V9                 V10                 V11          
 Min.   :-26.1605   Min.   :-43.5572   Min.   :-73.21672   Min.   :-13.43407   Min.   :-24.58826   Min.   :-4.79747  
 1st Qu.: -0.7683   1st Qu.: -0.5541   1st Qu.: -0.20863   1st Qu.: -0.64310   1st Qu.: -0.53543   1st Qu.:-0.76249  
 Median : -0.2742   Median :  0.0401   Median :  0.02236   Median : -0.05143   Median : -0.09292   Median :-0.03276  
 Mean   :  0.0000   Mean   :  0.0000   Mean   :  0.00000   Mean   :  0.00000   Mean   :  0.00000   Mean   : 0.00000  
 3rd Qu.:  0.3986   3rd Qu.:  0.5704   3rd Qu.:  0.32735   3rd Qu.:  0.59714   3rd Qu.:  0.45392   3rd Qu.: 0.73959  
 Max.   : 73.3016   Max.   :120.5895   Max.   : 20.00721   Max.   : 15.59500   Max.   : 23.74514   Max.   :12.01891  
      V12                V13                V14                V15                V16                 V17           
 Min.   :-18.6837   Min.   :-5.79188   Min.   :-19.2143   Min.   :-4.49894   Min.   :-14.12985   Min.   :-25.16280  
 1st Qu.: -0.4056   1st Qu.:-0.64854   1st Qu.: -0.4256   1st Qu.:-0.58288   1st Qu.: -0.46804   1st Qu.: -0.48375  
 Median :  0.1400   Median :-0.01357   Median :  0.0506   Median : 0.04807   Median :  0.06641   Median : -0.06568  
 Mean   :  0.0000   Mean   : 0.00000   Mean   :  0.0000   Mean   : 0.00000   Mean   :  0.00000   Mean   :  0.00000  
 3rd Qu.:  0.6182   3rd Qu.: 0.66251   3rd Qu.:  0.4931   3rd Qu.: 0.64882   3rd Qu.:  0.52330   3rd Qu.:  0.39968  
 Max.   :  7.8484   Max.   : 7.12688   Max.   : 10.5268   Max.   : 8.87774   Max.   : 17.31511   Max.   :  9.25353  
      V18                 V19                 V20                 V21                 V22            
 Min.   :-9.498746   Min.   :-7.213527   Min.   :-54.49772   Min.   :-34.83038   Min.   :-10.933144  
 1st Qu.:-0.498850   1st Qu.:-0.456299   1st Qu.: -0.21172   1st Qu.: -0.22839   1st Qu.: -0.542350  
 Median :-0.003636   Median : 0.003735   Median : -0.06248   Median : -0.02945   Median :  0.006782  
 Mean   : 0.000000   Mean   : 0.000000   Mean   :  0.00000   Mean   :  0.00000   Mean   :  0.000000  
 3rd Qu.: 0.500807   3rd Qu.: 0.458949   3rd Qu.:  0.13304   3rd Qu.:  0.18638   3rd Qu.:  0.528554  
 Max.   : 5.041069   Max.   : 5.591971   Max.   : 39.42090   Max.   : 27.20284   Max.   : 10.503090  
      V23                 V24                V25                 V26                V27            
 Min.   :-44.80774   Min.   :-2.83663   Min.   :-10.29540   Min.   :-2.60455   Min.   :-22.565679  
 1st Qu.: -0.16185   1st Qu.:-0.35459   1st Qu.: -0.31715   1st Qu.:-0.32698   1st Qu.: -0.070840  
 Median : -0.01119   Median : 0.04098   Median :  0.01659   Median :-0.05214   Median :  0.001342  
 Mean   :  0.00000   Mean   : 0.00000   Mean   :  0.00000   Mean   : 0.00000   Mean   :  0.000000  
 3rd Qu.:  0.14764   3rd Qu.: 0.43953   3rd Qu.:  0.35072   3rd Qu.: 0.24095   3rd Qu.:  0.091045  
 Max.   : 22.52841   Max.   : 4.58455   Max.   :  7.51959   Max.   : 3.51735   Max.   : 31.612198  
      V28                Amount         Class     
 Min.   :-15.43008   Min.   :    0.00   0:284315  
 1st Qu.: -0.05296   1st Qu.:    5.60   1:   492  
 Median :  0.01124   Median :   22.00             
 Mean   :  0.00000   Mean   :   88.35             
 3rd Qu.:  0.07828   3rd Qu.:   77.17             
 Max.   : 33.84781   Max.   :25691.16             

Using duplicated function to duplicated elements.

duplicated(credit_card)
   [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
  [19] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE
  [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
  [55] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
  [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
  [91] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [109] FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [127] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [163] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [199] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [217] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [235] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [253] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [271] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [289] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [307] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [325] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [343] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [361] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [379] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [397] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [415] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [433] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [451] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [469] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [487] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [505] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [523] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [541] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [559] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [577] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [595] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [613] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [631] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [649] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [667] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [685] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [703] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [721] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [739] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [757] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [775] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [793] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [811] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [829] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [847] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [865] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [883] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [901] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [919] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [937] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [955] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [973] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [991] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [ reached getOption("max.print") -- omitted 283807 entries ]

#Number of missing values in realtor dataset.

sum(is.na(credit_card))
[1] 0

There are no missing values in the dataset.

#Class Distribution:

table(credit_card$Class)

     0      1 
284315    492 
barplot(table(credit_card$Class))

#Time and Amount Analysis:

hist(credit_card$Time)

hist(credit_card$Amount)

boxplot(Amount ~ Class, data = credit_card, xlab = "Class", ylab = "Amount")

plot(credit_card$V1, credit_card$V2, col = ifelse(credit_card$Class == 1, "red", "blue"),xlab = "V1", ylab = "V2")

#Finding the Correlation of every feature vs Class Feature.

correlation <- cor(credit_card[,-31], as.numeric(credit_card[,31]))
correlation
                [,1]
Time   -0.0123225709
V1     -0.1013472986
V2      0.0912886503
V3     -0.1929608271
V4      0.1334474862
V5     -0.0949742990
V6     -0.0436431607
V7     -0.1872565915
V8      0.0198751239
V9     -0.0977326861
V10    -0.2168829436
V11     0.1548756447
V12    -0.2605929249
V13    -0.0045697788
V14    -0.3025436958
V15    -0.0042234023
V16    -0.1965389403
V17    -0.3264810672
V18    -0.1114852539
V19     0.0347830130
V20     0.0200903242
V21     0.0404133806
V22     0.0008053175
V23    -0.0026851557
V24    -0.0072209067
V25     0.0033077056
V26     0.0044553975
V27     0.0175797282
V28     0.0095360409
Amount  0.0056317530

The columns labeled as v1, v3, v10, v12, v14, v16, v17, and v18 exhibit a negative correlation, indicating a strong inverse relationship. Conversely, the columns v2, v4, and v11 demonstrate a positive correlation, indicating a direct relationship. The remaining columns do not display a significant correlation with the class variable, suggesting that they are not strongly associated with it.

corrplot(correlation, method = "pie", cl.pos='n')

On the Above observation we can conclude that:

Variables with Strong Negative Correlations: V10 (-0.2168829436) V12 (-0.2605929249) V14 (-0.3025436958) V17 (-0.3264810672) Variables with Moderate Negative Correlations: V3 (-0.1929608271) V7 (-0.1872565915) V16 (-0.1965389403) Variables with Moderate Positive Correlations: V4 (0.1334474862)

#removing less correlated variables.

credit_card <- select(credit_card, V3, V4, V7, V10, V12, V14, V16, V17, Class)
credit_card
summary(credit_card)
       V3                 V4                 V7                V10                 V12                V14          
 Min.   :-48.3256   Min.   :-5.68317   Min.   :-43.5572   Min.   :-24.58826   Min.   :-18.6837   Min.   :-19.2143  
 1st Qu.: -0.8904   1st Qu.:-0.84864   1st Qu.: -0.5541   1st Qu.: -0.53543   1st Qu.: -0.4056   1st Qu.: -0.4256  
 Median :  0.1799   Median :-0.01985   Median :  0.0401   Median : -0.09292   Median :  0.1400   Median :  0.0506  
 Mean   :  0.0000   Mean   : 0.00000   Mean   :  0.0000   Mean   :  0.00000   Mean   :  0.0000   Mean   :  0.0000  
 3rd Qu.:  1.0272   3rd Qu.: 0.74334   3rd Qu.:  0.5704   3rd Qu.:  0.45392   3rd Qu.:  0.6182   3rd Qu.:  0.4931  
 Max.   :  9.3826   Max.   :16.87534   Max.   :120.5895   Max.   : 23.74514   Max.   :  7.8484   Max.   : 10.5268  
      V16                 V17            Class     
 Min.   :-14.12985   Min.   :-25.16280   0:284315  
 1st Qu.: -0.46804   1st Qu.: -0.48375   1:   492  
 Median :  0.06641   Median : -0.06568             
 Mean   :  0.00000   Mean   :  0.00000             
 3rd Qu.:  0.52330   3rd Qu.:  0.39968             
 Max.   : 17.31511   Max.   :  9.25353             

#Box Plot, Density plot for all Feature vs Class

boxplot_plots <- list()
density_plots <- list()

for (var in c("V3", "V4", "V7", "V10", "V12", "V14", "V16", "V17")) {

  boxplot_plot <- ggplot(credit_card, aes(x = factor(Class), y = .data[[var]], fill = factor(Class))) +
    geom_boxplot() +
    labs(title = paste("Boxplot of", var, "by Class")) +
    theme_minimal()

    boxplot_plots <- append(boxplot_plots, list(boxplot_plot))

  density_plot <- ggplot(credit_card, aes(x = .data[[var]], fill = factor(Class))) +
    geom_density(alpha = 0.7) +
    labs(title = paste("Density Plot of", var, "by Class")) +
    theme_minimal()
  
   density_plots <- append(density_plots, list(density_plot))
}

for (plot in boxplot_plots) {
  print(plot)
}


for (plot in density_plots) {
  print(plot)
}

#t-test
for (var in c("V3", "V4", "V7", "V10", "V12", "V14", "V16", "V17")) {
  class_0 <- credit_card[[var]][credit_card$Class == 0]
  class_1 <- credit_card[[var]][credit_card$Class == 1]
  t_test <- t.test(class_0, class_1)
  cat(paste("Variable:", var, "\n"))
  cat(paste("t-test p-value:", t_test$p.value, "\n\n"))
}
Variable: V3 
t-test p-value: 4.78608140742888e-75 

Variable: V4 
t-test p-value: 4.58731692138804e-136 

Variable: V7 
t-test p-value: 4.29223131944152e-52 

Variable: V10 
t-test p-value: 3.38288365061388e-93 

Variable: V12 
t-test p-value: 1.48131650283816e-112 

Variable: V14 
t-test p-value: 1.0401507099026e-140 

Variable: V16 
t-test p-value: 7.90532563943685e-84 

Variable: V17 
t-test p-value: 1.55809380578438e-71 

#Under Sampling and Over Sampling of data

credit_card_0 <- credit_card[credit_card$Class == 0, ]
credit_card_1 <- credit_card[credit_card$Class == 1, ]

#Undersampling
credit_card_0_down <- credit_card_0[sample(nrow(credit_card_0), 50000), ]

# Oversampling
credit_card_1_over <- credit_card_1[sample(nrow(credit_card_1), 50000, replace = TRUE), ]

#Combining
credit_card <- rbind(credit_card_0_down, credit_card_1_over)

credit_card
dim(credit_card)
[1] 100000      9
summary(credit_card)
       V3                 V4                V7                V10                 V12                V14          
 Min.   :-33.6810   Min.   :-5.6006   Min.   :-43.5572   Min.   :-24.58826   Min.   :-18.6837   Min.   :-19.2143  
 1st Qu.: -5.1263   1st Qu.:-0.1086   1st Qu.: -3.1046   1st Qu.: -4.61451   1st Qu.: -5.5837   1st Qu.: -6.7633  
 Median : -1.3711   Median : 1.3096   Median : -0.6451   Median : -0.86790   Median : -0.8357   Median : -1.0225  
 Mean   : -3.5298   Mean   : 2.2596   Mean   : -2.7760   Mean   : -2.82971   Mean   : -3.1292   Mean   : -3.4857  
 3rd Qu.:  0.3164   3rd Qu.: 4.2332   3rd Qu.:  0.2716   3rd Qu.:  0.02872   3rd Qu.:  0.2048   3rd Qu.:  0.1219  
 Max.   :  4.1017   Max.   :12.6728   Max.   : 44.0545   Max.   : 15.24569   Max.   :  4.8465   Max.   :  7.6677  
      V16                V17           Class    
 Min.   :-14.1299   Min.   :-25.1628   0:50000  
 1st Qu.: -3.5632   1st Qu.: -5.3565   1:50000  
 Median : -0.6422   Median : -0.5275            
 Mean   : -2.0729   Mean   : -3.3316            
 3rd Qu.:  0.3001   3rd Qu.:  0.2595            
 Max.   :  7.0591   Max.   :  7.8934            

#Radomizing the data before spliting

credit_card <- credit_card[sample(nrow(credit_card)), ]
credit_card

#Spliting the data into training and testing sets

levels(credit_card$Class)=list(No ="0", Yes="1")
set.seed(123)
train_indices <- createDataPartition(credit_card$Class, p = 0.7, list = FALSE)
train_data <- credit_card[train_indices, ]
test_data <- credit_card[-train_indices, ]
# Check the dimensions of the train and test sets
dim(train_data)
[1] 70000     9
dim(test_data)
[1] 30000     9
str(train_data)
'data.frame':   70000 obs. of  9 variables:
 $ V3   : num  -9.427 0.71 -0.304 -8.533 -12.788 ...
 $ V4   : num  6.58 1.27 2.76 8.34 8.79 ...
 $ V7   : num  -10.8941 -0.605 -0.0428 -10.9892 -14.2488 ...
 $ V10  : num  -12.982 -0.273 -0.158 -11.436 -13.074 ...
 $ V12  : num  -14.154 1.78 -0.936 -14.296 -12.375 ...
 $ V14  : num  -16.338 -0.838 -1.633 -15.445 -11.471 ...
 $ V16  : num  -12.375 -0.605 1.426 -12.391 -10.974 ...
 $ V17  : num  -21.0906 -0.0303 1.5935 -22.5417 -21.0476 ...
 $ Class: Factor w/ 2 levels "No","Yes": 2 1 2 2 2 2 1 1 2 2 ...
str(test_data)
'data.frame':   30000 obs. of  9 variables:
 $ V3   : num  1.2821 -3.7318 -3.733 -17.8801 -0.0958 ...
 $ V4   : num  -1.158 6.679 1.415 9.249 -0.559 ...
 $ V7   : num  -0.0854 -8.7481 -0.3138 -18.0147 0.6889 ...
 $ V10  : num  -0.923 -5.666 -1.331 -14.557 -0.647 ...
 $ V12  : num  0.381 -6.147 0.299 -10.38 0.432 ...
 $ V14  : num  0.497 -4.329 -2.585 -6.918 0.367 ...
 $ V16  : num  0.5201 -1.839 1.5142 -10.3282 0.0338 ...
 $ V17  : num  -0.97 -3.35 2.82 -20.25 -0.28 ...
 $ Class: Factor w/ 2 levels "No","Yes": 1 2 1 2 1 1 2 1 2 2 ...

Decision tree using c50.

library(C50)

model_boosted <- C5.0(Class ~ ., data = train_data, trials = 30)

pred_boosted <- predict(model_boosted, newdata = test_data, type = "class")

confusion_matrix_boosted <- table(test_data$Class, pred_boosted)

confusion_matrix_boosted
     pred_boosted
         No   Yes
  No  14993     7
  Yes     0 15000
accuracy_boosted <- sum(diag(confusion_matrix_boosted)) / sum(confusion_matrix_boosted)

total_error_boosted <- 1 - accuracy_boosted

precision_boosted <- diag(confusion_matrix_boosted) / colSums(confusion_matrix_boosted)
recall_boosted <- diag(confusion_matrix_boosted) / rowSums(confusion_matrix_boosted)

precision_recall_boosted <- data.frame(Precision = precision_boosted, Recall = recall_boosted)

precision_recall_total_boosted <- rbind(precision_recall_boosted, c(mean(precision_boosted), mean(recall_boosted)))

precision_recall_total_boosted$Labels <- c(levels(test_data$Class), "Average")

print(confusion_matrix_boosted)
     pred_boosted
         No   Yes
  No  14993     7
  Yes     0 15000
print(accuracy_boosted)
[1] 0.9997667
print(paste0("Total Error: ", round(total_error_boosted, 4)))
[1] "Total Error: 2e-04"
print(precision_recall_total_boosted)

#Desicion Tree using rpart

credit_data_model <- train(Class ~ ., data = train_data, method = "rpart", trControl = trainControl(method = "cv", number = 5))
credit_data_pred <- predict(credit_data_model, test_data)
credit_data_cm <- confusionMatrix(credit_data_pred, test_data$Class)
credit_data_auc <- credit_data_cm$overall[1] *100
credit_data_auc
Accuracy 
   93.51 
credit_data_acc <- credit_data_cm$overall[4] *100
credit_data_acc
AccuracyUpper 
     93.78621 
cat("Accuracy of Decision Tree Model" ,credit_data_acc,". Summary of ROC Curve ",credit_data_auc)
Accuracy of Decision Tree Model 93.78621 . Summary of ROC Curve  93.51
credit_data_model
CART 

70000 samples
    8 predictor
    2 classes: 'No', 'Yes' 

No pre-processing
Resampling: Cross-Validated (5 fold) 
Summary of sample sizes: 56000, 56000, 56000, 56000, 56000 
Resampling results across tuning parameters:

  cp           Accuracy   Kappa    
  0.006857143  0.9350143  0.8700286
  0.012928571  0.9256143  0.8512286
  0.842342857  0.7518143  0.5036286

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was cp = 0.006857143.

#Knn

set.seed(1)
knn_credit_card<- train(Class ~ ., data = train_data, trControl = trainControl(method="cv", number=10, classProbs = TRUE, summaryFunction = twoClassSummary) ,method = "knn",metric ="ROC")
knn_credit_card
k-Nearest Neighbors 

70000 samples
    8 predictor
    2 classes: 'No', 'Yes' 

No pre-processing
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 63000, 63000, 63000, 63000, 63000, 63000, ... 
Resampling results across tuning parameters:

  k  ROC        Sens       Spec
  5  0.9991857  0.9920571  1   
  7  0.9991857  0.9896000  1   
  9  0.9991857  0.9869429  1   

ROC was used to select the optimal model using the largest value.
The final value used for the model was k = 9.
knn_predictions_prob=predict(knn_credit_card, test_data, type="prob")
knn_predictions= prediction(knn_predictions_prob$Yes,test_data$Class)
performance(knn_predictions, measure = "auc")
A performance instance
  'Area under the ROC curve'
knn_predicted_labels = predict(knn_credit_card, test_data)
confusionMatrix(knn_predicted_labels, test_data$Class ,positive="Yes", mode="everything")
Confusion Matrix and Statistics

          Reference
Prediction    No   Yes
       No  14808     0
       Yes   192 15000
                                          
               Accuracy : 0.9936          
                 95% CI : (0.9926, 0.9945)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.9872          
                                          
 Mcnemar's Test P-Value : < 2.2e-16       
                                          
            Sensitivity : 1.0000          
            Specificity : 0.9872          
         Pos Pred Value : 0.9874          
         Neg Pred Value : 1.0000          
              Precision : 0.9874          
                 Recall : 1.0000          
                     F1 : 0.9936          
             Prevalence : 0.5000          
         Detection Rate : 0.5000          
   Detection Prevalence : 0.5064          
      Balanced Accuracy : 0.9936          
                                          
       'Positive' Class : Yes             
                                          
plot(knn_predictions_prob)

Accuracy : 0.9938

set.seed(1)
knn_smote<- train(Class ~ ., data = train_data,trControl  =trainControl(method="cv", number=10, classProbs = TRUE, summaryFunction = twoClassSummary,sampling="smote") ,method = "knn",metric ="ROC")
Loading required package: recipes

Attaching package: ‘recipes’

The following object is masked from ‘package:stringr’:

    fixed

The following object is masked from ‘package:stats’:

    step
knn_smote
k-Nearest Neighbors 

70000 samples
    8 predictor
    2 classes: 'No', 'Yes' 

No pre-processing
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 63000, 63000, 63000, 63000, 63000, 63000, ... 
Addtional sampling using SMOTE

Resampling results across tuning parameters:

  k  ROC        Sens       Spec
  5  0.9991857  0.9920571  1   
  7  0.9991857  0.9896000  1   
  9  0.9991857  0.9869429  1   

ROC was used to select the optimal model using the largest value.
The final value used for the model was k = 9.
knn_smote_predictions_prob=predict(knn_smote, test_data, type="prob")
knn_smote_predictions= prediction(knn_smote_predictions_prob$Yes,test_data$Class)

performance(knn_smote_predictions, measure = "auc")
A performance instance
  'Area under the ROC curve'
knn_smote_predicted_labels = predict(knn_smote, test_data)
confusionMatrix(knn_smote_predicted_labels, test_data$Class ,positive="Yes", mode="everything")
Confusion Matrix and Statistics

          Reference
Prediction    No   Yes
       No  14808     0
       Yes   192 15000
                                          
               Accuracy : 0.9936          
                 95% CI : (0.9926, 0.9945)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.9872          
                                          
 Mcnemar's Test P-Value : < 2.2e-16       
                                          
            Sensitivity : 1.0000          
            Specificity : 0.9872          
         Pos Pred Value : 0.9874          
         Neg Pred Value : 1.0000          
              Precision : 0.9874          
                 Recall : 1.0000          
                     F1 : 0.9936          
             Prevalence : 0.5000          
         Detection Rate : 0.5000          
   Detection Prevalence : 0.5064          
      Balanced Accuracy : 0.9936          
                                          
       'Positive' Class : Yes             
                                          

Accuracy : 0.9938

#SVM

set.seed(1)

svm_credit<- train(Class ~ ., data = train_data,
             trControl = trainControl(method="cv", number=10, classProbs = TRUE, summaryFunction = twoClassSummary) ,method = "svmLinear",metric ="ROC")
svm_credit
Support Vector Machines with Linear Kernel 

70000 samples
    8 predictor
    2 classes: 'No', 'Yes' 

No pre-processing
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 63000, 63000, 63000, 63000, 63000, 63000, ... 
Resampling results:

  ROC        Sens       Spec     
  0.9791728  0.9754857  0.9095714

Tuning parameter 'C' was held constant at a value of 1
svm_predictions_prob=predict(svm_credit, test_data, type="prob")
svm_predictions= prediction(svm_predictions_prob$Yes,test_data$Class)
svm_predictions_prob

performance(svm_predictions, measure = "auc")
A performance instance
  'Area under the ROC curve'
svm_predicted_labels = predict(svm_credit, test_data)
confusionMatrix(svm_predicted_labels, test_data$Class ,positive="Yes", mode="everything")
Confusion Matrix and Statistics

          Reference
Prediction    No   Yes
       No  14594  1329
       Yes   406 13671
                                          
               Accuracy : 0.9422          
                 95% CI : (0.9395, 0.9448)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.8843          
                                          
 Mcnemar's Test P-Value : < 2.2e-16       
                                          
            Sensitivity : 0.9114          
            Specificity : 0.9729          
         Pos Pred Value : 0.9712          
         Neg Pred Value : 0.9165          
              Precision : 0.9712          
                 Recall : 0.9114          
                     F1 : 0.9403          
             Prevalence : 0.5000          
         Detection Rate : 0.4557          
   Detection Prevalence : 0.4692          
      Balanced Accuracy : 0.9422          
                                          
       'Positive' Class : Yes             
                                          

Accuracy : 0.9421

#SVM Smote

set.seed(1)
ctrl=trainControl(method="cv", number=10, classProbs = TRUE, summaryFunction = twoClassSummary, 
sampling="smote")
svm_smote<-train(Class ~ ., data = train_data, method = "svmLinear", verbose=FALSE, metric ="ROC", 
trControl= ctrl)
svm_smote
Support Vector Machines with Linear Kernel 

70000 samples
    8 predictor
    2 classes: 'No', 'Yes' 

No pre-processing
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 63000, 63000, 63000, 63000, 63000, 63000, ... 
Addtional sampling using SMOTE

Resampling results:

  ROC        Sens       Spec     
  0.9791728  0.9754857  0.9095714

Tuning parameter 'C' was held constant at a value of 1
svm_smote_predictions_prob=predict(svm_smote, test_data, type="prob")
svm_smote_predictions= prediction(svm_smote_predictions_prob$Yes,test_data$Class)

performance(svm_smote_predictions, measure = "auc")
A performance instance
  'Area under the ROC curve'
svm_smote_predicted_labels = predict(svm_smote, test_data)
confusionMatrix(svm_smote_predicted_labels, test_data$Class ,positive="Yes", mode="everything")
Confusion Matrix and Statistics

          Reference
Prediction    No   Yes
       No  14594  1329
       Yes   406 13671
                                          
               Accuracy : 0.9422          
                 95% CI : (0.9395, 0.9448)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.8843          
                                          
 Mcnemar's Test P-Value : < 2.2e-16       
                                          
            Sensitivity : 0.9114          
            Specificity : 0.9729          
         Pos Pred Value : 0.9712          
         Neg Pred Value : 0.9165          
              Precision : 0.9712          
                 Recall : 0.9114          
                     F1 : 0.9403          
             Prevalence : 0.5000          
         Detection Rate : 0.4557          
   Detection Prevalence : 0.4692          
      Balanced Accuracy : 0.9422          
                                          
       'Positive' Class : Yes             
                                          

Accuracy : 0.9421

SVM Radial

set.seed(1)
credit_rsvm_model <- train(Class ~ ., data = train_data, method = "svmRadial", trControl = trainControl(method = "cv", number = 10))
credit_rsvm_model
Support Vector Machines with Radial Basis Function Kernel 

70000 samples
    8 predictor
    2 classes: 'No', 'Yes' 

No pre-processing
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 63000, 63000, 63000, 63000, 63000, 63000, ... 
Resampling results across tuning parameters:

  C     Accuracy   Kappa    
  0.25  0.9572714  0.9145429
  0.50  0.9648571  0.9297143
  1.00  0.9703429  0.9406857

Tuning parameter 'sigma' was held constant at a value of 1.044916
Accuracy was used to select the optimal model using the largest value.
The final values used for the model were sigma = 1.044916 and C = 1.
credit_rsvm_pred <- predict(credit_rsvm_model, test_data)
credit_rsvm_cm <- confusionMatrix(credit_rsvm_pred, test_data$Class)
credit_rsvm_auc <- credit_rsvm_cm$overall[1] *100
credit_rsvm_acc <- credit_rsvm_cm$overall[4] *100
credit_rsvm_auc
Accuracy 
96.98333 

Accuracy 96.85

credit_rsvm_model
Support Vector Machines with Radial Basis Function Kernel 

70000 samples
    8 predictor
    2 classes: 'No', 'Yes' 

No pre-processing
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 63000, 63000, 63000, 63000, 63000, 63000, ... 
Resampling results across tuning parameters:

  C     Accuracy   Kappa    
  0.25  0.9572714  0.9145429
  0.50  0.9648571  0.9297143
  1.00  0.9703429  0.9406857

Tuning parameter 'sigma' was held constant at a value of 1.044916
Accuracy was used to select the optimal model using the largest value.
The final values used for the model were sigma = 1.044916 and C = 1.
credit_rsvm_cm
Confusion Matrix and Statistics

          Reference
Prediction    No   Yes
       No  14834   739
       Yes   166 14261
                                          
               Accuracy : 0.9698          
                 95% CI : (0.9678, 0.9717)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.9397          
                                          
 Mcnemar's Test P-Value : < 2.2e-16       
                                          
            Sensitivity : 0.9889          
            Specificity : 0.9507          
         Pos Pred Value : 0.9525          
         Neg Pred Value : 0.9885          
             Prevalence : 0.5000          
         Detection Rate : 0.4945          
   Detection Prevalence : 0.5191          
      Balanced Accuracy : 0.9698          
                                          
       'Positive' Class : No              
                                          

#RandomForest

set.seed(1)
credit_rf_model <- train(Class ~ ., data = train_data, method = "rf",importance= T,metric = "ROC", trControl = trainControl(method = "cv", number = 10,  classProbs = TRUE))
Warning: The metric "ROC" was not in the result set. Accuracy will be used instead.
credit_rf_model
Random Forest 

70000 samples
    8 predictor
    2 classes: 'No', 'Yes' 

No pre-processing
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 63000, 63000, 63000, 63000, 63000, 63000, ... 
Resampling results across tuning parameters:

  mtry  Accuracy   Kappa    
  2     0.9998286  0.9996571
  5     0.9997000  0.9994000
  8     0.9989000  0.9978000

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was mtry = 2.
credit_rf_pred <- predict(credit_rf_model, test_data)
credit_rf_cm <- confusionMatrix(credit_rf_pred, test_data$Class)
credit_rf_auc <- credit_rf_cm$overall[1]
credit_rf_acc <- credit_rf_cm$overall[4]
credit_rf_auc
Accuracy 
  0.9998 
varImp(credit_rf_model)
rf variable importance
credit_rf_model
Random Forest 

70000 samples
    8 predictor
    2 classes: 'No', 'Yes' 

No pre-processing
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 63000, 63000, 63000, 63000, 63000, 63000, ... 
Resampling results across tuning parameters:

  mtry  Accuracy   Kappa    
  2     0.9998286  0.9996571
  5     0.9997000  0.9994000
  8     0.9989000  0.9978000

Accuracy was used to select the optimal model using the largest value.
The final value used for the model was mtry = 2.
credit_rf_cm
Confusion Matrix and Statistics

          Reference
Prediction    No   Yes
       No  14994     0
       Yes     6 15000
                                          
               Accuracy : 0.9998          
                 95% CI : (0.9996, 0.9999)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : < 2e-16         
                                          
                  Kappa : 0.9996          
                                          
 Mcnemar's Test P-Value : 0.04123         
                                          
            Sensitivity : 0.9996          
            Specificity : 1.0000          
         Pos Pred Value : 1.0000          
         Neg Pred Value : 0.9996          
             Prevalence : 0.5000          
         Detection Rate : 0.4998          
   Detection Prevalence : 0.4998          
      Balanced Accuracy : 0.9998          
                                          
       'Positive' Class : No              
                                          

#Gradient Boosted Tree model

set.seed(1)
credit_gbm_model <- train(Class ~ ., data = train_data, method = "gbm", trControl  = trainControl(method = "cv", number = 10))
Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2504             nan     0.1000    0.0680
     2        1.1398             nan     0.1000    0.0555
     3        1.0474             nan     0.1000    0.0463
     4        0.9692             nan     0.1000    0.0391
     5        0.9029             nan     0.1000    0.0331
     6        0.8464             nan     0.1000    0.0282
     7        0.7960             nan     0.1000    0.0251
     8        0.7512             nan     0.1000    0.0223
     9        0.7128             nan     0.1000    0.0193
    10        0.6779             nan     0.1000    0.0174
    20        0.4760             nan     0.1000    0.0060
    40        0.3520             nan     0.1000    0.0017
    60        0.3125             nan     0.1000    0.0008
    80        0.2941             nan     0.1000    0.0004
   100        0.2830             nan     0.1000    0.0001
   120        0.2746             nan     0.1000    0.0002
   140        0.2672             nan     0.1000    0.0000
   150        0.2647             nan     0.1000    0.0001

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2431             nan     0.1000    0.0717
     2        1.1259             nan     0.1000    0.0585
     3        1.0281             nan     0.1000    0.0489
     4        0.9441             nan     0.1000    0.0419
     5        0.8748             nan     0.1000    0.0346
     6        0.8143             nan     0.1000    0.0300
     7        0.7589             nan     0.1000    0.0277
     8        0.7116             nan     0.1000    0.0235
     9        0.6699             nan     0.1000    0.0210
    10        0.6339             nan     0.1000    0.0180
    20        0.4280             nan     0.1000    0.0069
    40        0.2994             nan     0.1000    0.0015
    60        0.2631             nan     0.1000    0.0006
    80        0.2420             nan     0.1000    0.0002
   100        0.2275             nan     0.1000    0.0002
   120        0.2140             nan     0.1000    0.0003
   140        0.2008             nan     0.1000    0.0002
   150        0.1953             nan     0.1000    0.0001

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2401             nan     0.1000    0.0732
     2        1.1207             nan     0.1000    0.0600
     3        1.0206             nan     0.1000    0.0498
     4        0.9366             nan     0.1000    0.0420
     5        0.8628             nan     0.1000    0.0367
     6        0.7974             nan     0.1000    0.0327
     7        0.7434             nan     0.1000    0.0272
     8        0.6943             nan     0.1000    0.0245
     9        0.6531             nan     0.1000    0.0205
    10        0.6153             nan     0.1000    0.0187
    20        0.3995             nan     0.1000    0.0069
    40        0.2742             nan     0.1000    0.0012
    60        0.2335             nan     0.1000    0.0003
    80        0.2093             nan     0.1000    0.0005
   100        0.1892             nan     0.1000    0.0003
   120        0.1747             nan     0.1000    0.0002
   140        0.1591             nan     0.1000    0.0002
   150        0.1534             nan     0.1000    0.0003

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2501             nan     0.1000    0.0681
     2        1.1389             nan     0.1000    0.0555
     3        1.0459             nan     0.1000    0.0462
     4        0.9677             nan     0.1000    0.0390
     5        0.9016             nan     0.1000    0.0330
     6        0.8444             nan     0.1000    0.0283
     7        0.7943             nan     0.1000    0.0252
     8        0.7504             nan     0.1000    0.0219
     9        0.7121             nan     0.1000    0.0192
    10        0.6782             nan     0.1000    0.0169
    20        0.4747             nan     0.1000    0.0063
    40        0.3528             nan     0.1000    0.0010
    60        0.3128             nan     0.1000    0.0006
    80        0.2937             nan     0.1000    0.0006
   100        0.2820             nan     0.1000    0.0002
   120        0.2739             nan     0.1000    0.0001
   140        0.2681             nan     0.1000    0.0001
   150        0.2657             nan     0.1000    0.0001

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2427             nan     0.1000    0.0717
     2        1.1254             nan     0.1000    0.0585
     3        1.0259             nan     0.1000    0.0497
     4        0.9445             nan     0.1000    0.0406
     5        0.8722             nan     0.1000    0.0360
     6        0.8105             nan     0.1000    0.0307
     7        0.7580             nan     0.1000    0.0262
     8        0.7136             nan     0.1000    0.0221
     9        0.6708             nan     0.1000    0.0214
    10        0.6337             nan     0.1000    0.0184
    20        0.4229             nan     0.1000    0.0057
    40        0.2992             nan     0.1000    0.0012
    60        0.2614             nan     0.1000    0.0008
    80        0.2408             nan     0.1000    0.0005
   100        0.2254             nan     0.1000    0.0003
   120        0.2124             nan     0.1000    0.0003
   140        0.2002             nan     0.1000    0.0000
   150        0.1947             nan     0.1000    0.0001

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2400             nan     0.1000    0.0731
     2        1.1198             nan     0.1000    0.0599
     3        1.0199             nan     0.1000    0.0497
     4        0.9354             nan     0.1000    0.0422
     5        0.8618             nan     0.1000    0.0367
     6        0.7993             nan     0.1000    0.0313
     7        0.7465             nan     0.1000    0.0263
     8        0.6958             nan     0.1000    0.0254
     9        0.6527             nan     0.1000    0.0216
    10        0.6150             nan     0.1000    0.0186
    20        0.3974             nan     0.1000    0.0071
    40        0.2729             nan     0.1000    0.0012
    60        0.2303             nan     0.1000    0.0004
    80        0.2043             nan     0.1000    0.0004
   100        0.1861             nan     0.1000    0.0001
   120        0.1696             nan     0.1000    0.0002
   140        0.1570             nan     0.1000    0.0004
   150        0.1513             nan     0.1000    0.0002

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2495             nan     0.1000    0.0681
     2        1.1385             nan     0.1000    0.0557
     3        1.0456             nan     0.1000    0.0464
     4        0.9676             nan     0.1000    0.0391
     5        0.9012             nan     0.1000    0.0332
     6        0.8448             nan     0.1000    0.0283
     7        0.7945             nan     0.1000    0.0252
     8        0.7498             nan     0.1000    0.0223
     9        0.7111             nan     0.1000    0.0192
    10        0.6765             nan     0.1000    0.0171
    20        0.4745             nan     0.1000    0.0061
    40        0.3528             nan     0.1000    0.0011
    60        0.3127             nan     0.1000    0.0006
    80        0.2945             nan     0.1000    0.0001
   100        0.2819             nan     0.1000    0.0003
   120        0.2736             nan     0.1000    0.0002
   140        0.2670             nan     0.1000    0.0001
   150        0.2644             nan     0.1000    0.0001

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2426             nan     0.1000    0.0718
     2        1.1254             nan     0.1000    0.0586
     3        1.0263             nan     0.1000    0.0498
     4        0.9444             nan     0.1000    0.0409
     5        0.8724             nan     0.1000    0.0361
     6        0.8124             nan     0.1000    0.0301
     7        0.7570             nan     0.1000    0.0276
     8        0.7119             nan     0.1000    0.0226
     9        0.6693             nan     0.1000    0.0211
    10        0.6351             nan     0.1000    0.0168
    20        0.4228             nan     0.1000    0.0074
    40        0.2974             nan     0.1000    0.0016
    60        0.2618             nan     0.1000    0.0006
    80        0.2432             nan     0.1000    0.0004
   100        0.2278             nan     0.1000    0.0002
   120        0.2147             nan     0.1000    0.0002
   140        0.2003             nan     0.1000    0.0001
   150        0.1938             nan     0.1000    0.0001

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2398             nan     0.1000    0.0731
     2        1.1200             nan     0.1000    0.0602
     3        1.0199             nan     0.1000    0.0499
     4        0.9319             nan     0.1000    0.0442
     5        0.8589             nan     0.1000    0.0364
     6        0.7966             nan     0.1000    0.0310
     7        0.7426             nan     0.1000    0.0268
     8        0.6931             nan     0.1000    0.0246
     9        0.6512             nan     0.1000    0.0210
    10        0.6138             nan     0.1000    0.0188
    20        0.3980             nan     0.1000    0.0062
    40        0.2724             nan     0.1000    0.0016
    60        0.2293             nan     0.1000    0.0004
    80        0.2071             nan     0.1000    0.0004
   100        0.1893             nan     0.1000    0.0006
   120        0.1732             nan     0.1000    0.0004
   140        0.1582             nan     0.1000    0.0003
   150        0.1529             nan     0.1000    0.0001

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2500             nan     0.1000    0.0681
     2        1.1390             nan     0.1000    0.0555
     3        1.0467             nan     0.1000    0.0462
     4        0.9687             nan     0.1000    0.0390
     5        0.9025             nan     0.1000    0.0331
     6        0.8457             nan     0.1000    0.0284
     7        0.7956             nan     0.1000    0.0253
     8        0.7519             nan     0.1000    0.0219
     9        0.7130             nan     0.1000    0.0194
    10        0.6793             nan     0.1000    0.0168
    20        0.4762             nan     0.1000    0.0062
    40        0.3541             nan     0.1000    0.0011
    60        0.3143             nan     0.1000    0.0006
    80        0.2955             nan     0.1000    0.0004
   100        0.2840             nan     0.1000    0.0001
   120        0.2759             nan     0.1000    0.0001
   140        0.2695             nan     0.1000    0.0001
   150        0.2669             nan     0.1000    0.0002

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2431             nan     0.1000    0.0715
     2        1.1259             nan     0.1000    0.0586
     3        1.0267             nan     0.1000    0.0495
     4        0.9454             nan     0.1000    0.0406
     5        0.8752             nan     0.1000    0.0349
     6        0.8130             nan     0.1000    0.0311
     7        0.7610             nan     0.1000    0.0258
     8        0.7122             nan     0.1000    0.0243
     9        0.6704             nan     0.1000    0.0208
    10        0.6358             nan     0.1000    0.0172
    20        0.4269             nan     0.1000    0.0064
    40        0.3016             nan     0.1000    0.0013
    60        0.2635             nan     0.1000    0.0004
    80        0.2422             nan     0.1000    0.0006
   100        0.2250             nan     0.1000    0.0003
   120        0.2137             nan     0.1000    0.0003
   140        0.2010             nan     0.1000    0.0003
   150        0.1930             nan     0.1000    0.0001

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2397             nan     0.1000    0.0729
     2        1.1207             nan     0.1000    0.0599
     3        1.0207             nan     0.1000    0.0497
     4        0.9329             nan     0.1000    0.0439
     5        0.8598             nan     0.1000    0.0367
     6        0.7981             nan     0.1000    0.0308
     7        0.7411             nan     0.1000    0.0284
     8        0.6942             nan     0.1000    0.0234
     9        0.6513             nan     0.1000    0.0214
    10        0.6161             nan     0.1000    0.0175
    20        0.3998             nan     0.1000    0.0072
    40        0.2743             nan     0.1000    0.0014
    60        0.2305             nan     0.1000    0.0003
    80        0.2058             nan     0.1000    0.0005
   100        0.1866             nan     0.1000    0.0003
   120        0.1717             nan     0.1000    0.0004
   140        0.1606             nan     0.1000    0.0005
   150        0.1559             nan     0.1000    0.0003

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2504             nan     0.1000    0.0679
     2        1.1384             nan     0.1000    0.0555
     3        1.0460             nan     0.1000    0.0463
     4        0.9681             nan     0.1000    0.0388
     5        0.9020             nan     0.1000    0.0331
     6        0.8451             nan     0.1000    0.0283
     7        0.7947             nan     0.1000    0.0251
     8        0.7505             nan     0.1000    0.0220
     9        0.7119             nan     0.1000    0.0192
    10        0.6770             nan     0.1000    0.0174
    20        0.4757             nan     0.1000    0.0059
    40        0.3535             nan     0.1000    0.0018
    60        0.3136             nan     0.1000    0.0005
    80        0.2942             nan     0.1000    0.0005
   100        0.2841             nan     0.1000    0.0003
   120        0.2748             nan     0.1000    0.0002
   140        0.2690             nan     0.1000    0.0001
   150        0.2662             nan     0.1000    0.0001

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2432             nan     0.1000    0.0716
     2        1.1259             nan     0.1000    0.0585
     3        1.0267             nan     0.1000    0.0496
     4        0.9448             nan     0.1000    0.0410
     5        0.8743             nan     0.1000    0.0350
     6        0.8148             nan     0.1000    0.0295
     7        0.7593             nan     0.1000    0.0275
     8        0.7119             nan     0.1000    0.0238
     9        0.6728             nan     0.1000    0.0196
    10        0.6357             nan     0.1000    0.0185
    20        0.4221             nan     0.1000    0.0070
    40        0.3016             nan     0.1000    0.0016
    60        0.2643             nan     0.1000    0.0004
    80        0.2449             nan     0.1000    0.0003
   100        0.2276             nan     0.1000    0.0003
   120        0.2138             nan     0.1000    0.0003
   140        0.2039             nan     0.1000    0.0004
   150        0.1991             nan     0.1000    0.0002

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2398             nan     0.1000    0.0732
     2        1.1199             nan     0.1000    0.0599
     3        1.0196             nan     0.1000    0.0498
     4        0.9360             nan     0.1000    0.0418
     5        0.8622             nan     0.1000    0.0369
     6        0.7960             nan     0.1000    0.0329
     7        0.7418             nan     0.1000    0.0269
     8        0.6948             nan     0.1000    0.0233
     9        0.6520             nan     0.1000    0.0214
    10        0.6137             nan     0.1000    0.0193
    20        0.3980             nan     0.1000    0.0065
    40        0.2743             nan     0.1000    0.0017
    60        0.2317             nan     0.1000    0.0006
    80        0.2064             nan     0.1000    0.0003
   100        0.1867             nan     0.1000    0.0006
   120        0.1722             nan     0.1000    0.0003
   140        0.1581             nan     0.1000    0.0001
   150        0.1540             nan     0.1000    0.0003

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2507             nan     0.1000    0.0678
     2        1.1406             nan     0.1000    0.0554
     3        1.0483             nan     0.1000    0.0463
     4        0.9701             nan     0.1000    0.0390
     5        0.9038             nan     0.1000    0.0331
     6        0.8471             nan     0.1000    0.0282
     7        0.7963             nan     0.1000    0.0253
     8        0.7524             nan     0.1000    0.0219
     9        0.7136             nan     0.1000    0.0193
    10        0.6791             nan     0.1000    0.0172
    20        0.4775             nan     0.1000    0.0060
    40        0.3551             nan     0.1000    0.0011
    60        0.3145             nan     0.1000    0.0006
    80        0.2953             nan     0.1000    0.0007
   100        0.2838             nan     0.1000    0.0004
   120        0.2757             nan     0.1000    0.0001
   140        0.2695             nan     0.1000    0.0000
   150        0.2660             nan     0.1000    0.0002

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2429             nan     0.1000    0.0714
     2        1.1257             nan     0.1000    0.0586
     3        1.0265             nan     0.1000    0.0496
     4        0.9440             nan     0.1000    0.0411
     5        0.8743             nan     0.1000    0.0347
     6        0.8137             nan     0.1000    0.0301
     7        0.7588             nan     0.1000    0.0276
     8        0.7115             nan     0.1000    0.0235
     9        0.6703             nan     0.1000    0.0206
    10        0.6330             nan     0.1000    0.0186
    20        0.4273             nan     0.1000    0.0060
    40        0.3020             nan     0.1000    0.0013
    60        0.2630             nan     0.1000    0.0008
    80        0.2422             nan     0.1000    0.0004
   100        0.2253             nan     0.1000    0.0001
   120        0.2124             nan     0.1000    0.0002
   140        0.2004             nan     0.1000    0.0002
   150        0.1952             nan     0.1000    0.0002

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2403             nan     0.1000    0.0730
     2        1.1203             nan     0.1000    0.0597
     3        1.0204             nan     0.1000    0.0498
     4        0.9360             nan     0.1000    0.0421
     5        0.8607             nan     0.1000    0.0379
     6        0.7993             nan     0.1000    0.0307
     7        0.7451             nan     0.1000    0.0270
     8        0.6954             nan     0.1000    0.0248
     9        0.6523             nan     0.1000    0.0214
    10        0.6158             nan     0.1000    0.0182
    20        0.4009             nan     0.1000    0.0061
    40        0.2760             nan     0.1000    0.0018
    60        0.2331             nan     0.1000    0.0007
    80        0.2094             nan     0.1000    0.0006
   100        0.1897             nan     0.1000    0.0005
   120        0.1730             nan     0.1000    0.0002
   140        0.1611             nan     0.1000    0.0005
   150        0.1552             nan     0.1000    0.0003

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2501             nan     0.1000    0.0681
     2        1.1394             nan     0.1000    0.0556
     3        1.0468             nan     0.1000    0.0464
     4        0.9684             nan     0.1000    0.0392
     5        0.9026             nan     0.1000    0.0331
     6        0.8458             nan     0.1000    0.0281
     7        0.7948             nan     0.1000    0.0253
     8        0.7498             nan     0.1000    0.0224
     9        0.7114             nan     0.1000    0.0193
    10        0.6767             nan     0.1000    0.0173
    20        0.4759             nan     0.1000    0.0057
    40        0.3520             nan     0.1000    0.0019
    60        0.3139             nan     0.1000    0.0006
    80        0.2936             nan     0.1000    0.0003
   100        0.2834             nan     0.1000    0.0001
   120        0.2745             nan     0.1000    0.0002
   140        0.2695             nan     0.1000    0.0000
   150        0.2653             nan     0.1000    0.0001

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2434             nan     0.1000    0.0719
     2        1.1260             nan     0.1000    0.0587
     3        1.0263             nan     0.1000    0.0498
     4        0.9449             nan     0.1000    0.0405
     5        0.8731             nan     0.1000    0.0359
     6        0.8133             nan     0.1000    0.0300
     7        0.7613             nan     0.1000    0.0258
     8        0.7131             nan     0.1000    0.0241
     9        0.6711             nan     0.1000    0.0208
    10        0.6339             nan     0.1000    0.0184
    20        0.4265             nan     0.1000    0.0061
    40        0.2992             nan     0.1000    0.0010
    60        0.2630             nan     0.1000    0.0003
    80        0.2432             nan     0.1000    0.0001
   100        0.2253             nan     0.1000    0.0003
   120        0.2125             nan     0.1000    0.0003
   140        0.2003             nan     0.1000    0.0001
   150        0.1954             nan     0.1000    0.0001

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2396             nan     0.1000    0.0733
     2        1.1197             nan     0.1000    0.0599
     3        1.0197             nan     0.1000    0.0501
     4        0.9349             nan     0.1000    0.0421
     5        0.8589             nan     0.1000    0.0379
     6        0.7954             nan     0.1000    0.0318
     7        0.7414             nan     0.1000    0.0269
     8        0.6951             nan     0.1000    0.0230
     9        0.6520             nan     0.1000    0.0217
    10        0.6130             nan     0.1000    0.0195
    20        0.3973             nan     0.1000    0.0072
    40        0.2750             nan     0.1000    0.0015
    60        0.2283             nan     0.1000    0.0009
    80        0.2021             nan     0.1000    0.0009
   100        0.1828             nan     0.1000    0.0005
   120        0.1719             nan     0.1000    0.0003
   140        0.1583             nan     0.1000    0.0002
   150        0.1517             nan     0.1000    0.0004

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2503             nan     0.1000    0.0680
     2        1.1394             nan     0.1000    0.0554
     3        1.0468             nan     0.1000    0.0462
     4        0.9690             nan     0.1000    0.0389
     5        0.9026             nan     0.1000    0.0329
     6        0.8464             nan     0.1000    0.0283
     7        0.7956             nan     0.1000    0.0254
     8        0.7510             nan     0.1000    0.0222
     9        0.7122             nan     0.1000    0.0193
    10        0.6772             nan     0.1000    0.0173
    20        0.4757             nan     0.1000    0.0061
    40        0.3535             nan     0.1000    0.0018
    60        0.3147             nan     0.1000    0.0004
    80        0.2940             nan     0.1000    0.0002
   100        0.2829             nan     0.1000    0.0001
   120        0.2745             nan     0.1000    0.0001
   140        0.2677             nan     0.1000    0.0002
   150        0.2654             nan     0.1000    0.0001

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2433             nan     0.1000    0.0715
     2        1.1262             nan     0.1000    0.0584
     3        1.0271             nan     0.1000    0.0496
     4        0.9448             nan     0.1000    0.0409
     5        0.8747             nan     0.1000    0.0348
     6        0.8125             nan     0.1000    0.0310
     7        0.7580             nan     0.1000    0.0275
     8        0.7135             nan     0.1000    0.0224
     9        0.6717             nan     0.1000    0.0209
    10        0.6355             nan     0.1000    0.0180
    20        0.4258             nan     0.1000    0.0064
    40        0.3000             nan     0.1000    0.0017
    60        0.2612             nan     0.1000    0.0007
    80        0.2417             nan     0.1000    0.0002
   100        0.2244             nan     0.1000    0.0004
   120        0.2111             nan     0.1000    0.0002
   140        0.1987             nan     0.1000    0.0002
   150        0.1935             nan     0.1000    0.0002

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2399             nan     0.1000    0.0729
     2        1.1207             nan     0.1000    0.0598
     3        1.0203             nan     0.1000    0.0502
     4        0.9347             nan     0.1000    0.0428
     5        0.8614             nan     0.1000    0.0367
     6        0.7962             nan     0.1000    0.0326
     7        0.7416             nan     0.1000    0.0269
     8        0.6926             nan     0.1000    0.0245
     9        0.6518             nan     0.1000    0.0204
    10        0.6145             nan     0.1000    0.0187
    20        0.3981             nan     0.1000    0.0064
    40        0.2743             nan     0.1000    0.0016
    60        0.2290             nan     0.1000    0.0006
    80        0.2044             nan     0.1000    0.0007
   100        0.1863             nan     0.1000    0.0004
   120        0.1704             nan     0.1000    0.0005
   140        0.1582             nan     0.1000    0.0003
   150        0.1515             nan     0.1000    0.0003

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2505             nan     0.1000    0.0680
     2        1.1395             nan     0.1000    0.0556
     3        1.0472             nan     0.1000    0.0461
     4        0.9684             nan     0.1000    0.0394
     5        0.9017             nan     0.1000    0.0332
     6        0.8453             nan     0.1000    0.0282
     7        0.7946             nan     0.1000    0.0253
     8        0.7497             nan     0.1000    0.0222
     9        0.7110             nan     0.1000    0.0193
    10        0.6769             nan     0.1000    0.0170
    20        0.4759             nan     0.1000    0.0063
    40        0.3546             nan     0.1000    0.0011
    60        0.3153             nan     0.1000    0.0006
    80        0.2953             nan     0.1000    0.0005
   100        0.2843             nan     0.1000    0.0001
   120        0.2756             nan     0.1000    0.0001
   140        0.2683             nan     0.1000    0.0002
   150        0.2653             nan     0.1000    0.0000

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2429             nan     0.1000    0.0716
     2        1.1253             nan     0.1000    0.0585
     3        1.0261             nan     0.1000    0.0497
     4        0.9446             nan     0.1000    0.0407
     5        0.8729             nan     0.1000    0.0359
     6        0.8133             nan     0.1000    0.0297
     7        0.7581             nan     0.1000    0.0275
     8        0.7111             nan     0.1000    0.0235
     9        0.6698             nan     0.1000    0.0206
    10        0.6317             nan     0.1000    0.0189
    20        0.4229             nan     0.1000    0.0070
    40        0.2990             nan     0.1000    0.0016
    60        0.2623             nan     0.1000    0.0004
    80        0.2429             nan     0.1000    0.0005
   100        0.2242             nan     0.1000    0.0006
   120        0.2109             nan     0.1000    0.0004
   140        0.2020             nan     0.1000    0.0002
   150        0.1939             nan     0.1000    0.0002

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2401             nan     0.1000    0.0732
     2        1.1200             nan     0.1000    0.0600
     3        1.0198             nan     0.1000    0.0499
     4        0.9354             nan     0.1000    0.0421
     5        0.8591             nan     0.1000    0.0379
     6        0.7969             nan     0.1000    0.0311
     7        0.7420             nan     0.1000    0.0274
     8        0.6926             nan     0.1000    0.0247
     9        0.6519             nan     0.1000    0.0203
    10        0.6163             nan     0.1000    0.0176
    20        0.3984             nan     0.1000    0.0072
    40        0.2769             nan     0.1000    0.0018
    60        0.2309             nan     0.1000    0.0008
    80        0.2057             nan     0.1000    0.0008
   100        0.1864             nan     0.1000    0.0004
   120        0.1711             nan     0.1000    0.0003
   140        0.1577             nan     0.1000    0.0003
   150        0.1513             nan     0.1000    0.0002

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2504             nan     0.1000    0.0682
     2        1.1391             nan     0.1000    0.0557
     3        1.0462             nan     0.1000    0.0465
     4        0.9678             nan     0.1000    0.0391
     5        0.9018             nan     0.1000    0.0328
     6        0.8449             nan     0.1000    0.0283
     7        0.7937             nan     0.1000    0.0256
     8        0.7490             nan     0.1000    0.0223
     9        0.7101             nan     0.1000    0.0193
    10        0.6756             nan     0.1000    0.0170
    20        0.4751             nan     0.1000    0.0061
    40        0.3511             nan     0.1000    0.0019
    60        0.3122             nan     0.1000    0.0009
    80        0.2939             nan     0.1000    0.0006
   100        0.2816             nan     0.1000    0.0003
   120        0.2731             nan     0.1000    0.0002
   140        0.2662             nan     0.1000    0.0001
   150        0.2636             nan     0.1000    0.0001

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2424             nan     0.1000    0.0717
     2        1.1246             nan     0.1000    0.0587
     3        1.0268             nan     0.1000    0.0488
     4        0.9429             nan     0.1000    0.0419
     5        0.8731             nan     0.1000    0.0347
     6        0.8109             nan     0.1000    0.0309
     7        0.7584             nan     0.1000    0.0261
     8        0.7100             nan     0.1000    0.0241
     9        0.6706             nan     0.1000    0.0198
    10        0.6340             nan     0.1000    0.0184
    20        0.4254             nan     0.1000    0.0078
    40        0.2991             nan     0.1000    0.0011
    60        0.2612             nan     0.1000    0.0004
    80        0.2418             nan     0.1000    0.0004
   100        0.2255             nan     0.1000    0.0005
   120        0.2128             nan     0.1000    0.0002
   140        0.1999             nan     0.1000    0.0002
   150        0.1943             nan     0.1000    0.0001

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2398             nan     0.1000    0.0735
     2        1.1199             nan     0.1000    0.0599
     3        1.0195             nan     0.1000    0.0501
     4        0.9332             nan     0.1000    0.0431
     5        0.8608             nan     0.1000    0.0361
     6        0.7951             nan     0.1000    0.0328
     7        0.7383             nan     0.1000    0.0284
     8        0.6895             nan     0.1000    0.0245
     9        0.6478             nan     0.1000    0.0208
    10        0.6120             nan     0.1000    0.0179
    20        0.3959             nan     0.1000    0.0070
    40        0.2699             nan     0.1000    0.0014
    60        0.2279             nan     0.1000    0.0006
    80        0.2024             nan     0.1000    0.0007
   100        0.1855             nan     0.1000    0.0002
   120        0.1706             nan     0.1000    0.0009
   140        0.1558             nan     0.1000    0.0003
   150        0.1489             nan     0.1000    0.0003

Iter   TrainDeviance   ValidDeviance   StepSize   Improve
     1        1.2399             nan     0.1000    0.0732
     2        1.1198             nan     0.1000    0.0600
     3        1.0201             nan     0.1000    0.0496
     4        0.9322             nan     0.1000    0.0441
     5        0.8601             nan     0.1000    0.0360
     6        0.7966             nan     0.1000    0.0316
     7        0.7434             nan     0.1000    0.0267
     8        0.6959             nan     0.1000    0.0236
     9        0.6526             nan     0.1000    0.0216
    10        0.6134             nan     0.1000    0.0195
    20        0.3973             nan     0.1000    0.0067
    40        0.2727             nan     0.1000    0.0019
    60        0.2271             nan     0.1000    0.0007
    80        0.2037             nan     0.1000    0.0009
   100        0.1866             nan     0.1000    0.0002
   120        0.1717             nan     0.1000    0.0004
   140        0.1587             nan     0.1000    0.0008
   150        0.1509             nan     0.1000    0.0003
credit_gbm_predictions <- predict(credit_gbm_model,test_data)
credit_gbm_cm <- confusionMatrix(credit_gbm_predictions, test_data$Class)
credit_gbm_auc <- credit_gbm_cm$overall[1]
credit_gbm_auc
Accuracy 
  0.9745 
credit_gbm_acc <- credit_gbm_cm$overall[4]
credit_gbm_acc
AccuracyUpper 
    0.9762544 

Accuracy : 0.9698

credit_gbm_model
Stochastic Gradient Boosting 

70000 samples
    8 predictor
    2 classes: 'No', 'Yes' 

No pre-processing
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 63000, 63000, 63000, 63000, 63000, 63000, ... 
Resampling results across tuning parameters:

  interaction.depth  n.trees  Accuracy   Kappa    
  1                   50      0.9426857  0.8853714
  1                  100      0.9458286  0.8916571
  1                  150      0.9481714  0.8963429
  2                   50      0.9484000  0.8968000
  2                  100      0.9568143  0.9136286
  2                  150      0.9626000  0.9252000
  3                   50      0.9553571  0.9107143
  3                  100      0.9643714  0.9287429
  3                  150      0.9719429  0.9438857

Tuning parameter 'shrinkage' was held constant at a value of 0.1
Tuning parameter 'n.minobsinnode' was held
 constant at a value of 10
Accuracy was used to select the optimal model using the largest value.
The final values used for the model were n.trees = 150, interaction.depth = 3, shrinkage = 0.1 and n.minobsinnode
 = 10.
credit_gbm_cm
Confusion Matrix and Statistics

          Reference
Prediction    No   Yes
       No  14775   540
       Yes   225 14460
                                          
               Accuracy : 0.9745          
                 95% CI : (0.9727, 0.9763)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.949           
                                          
 Mcnemar's Test P-Value : < 2.2e-16       
                                          
            Sensitivity : 0.9850          
            Specificity : 0.9640          
         Pos Pred Value : 0.9647          
         Neg Pred Value : 0.9847          
             Prevalence : 0.5000          
         Detection Rate : 0.4925          
   Detection Prevalence : 0.5105          
      Balanced Accuracy : 0.9745          
                                          
       'Positive' Class : No              
                                          

XGBoost

set.seed(1)
credit_xgb_model <- train(Class ~ ., data = train_data, method = "xgbTree", trControl = trainControl(method = "cv", number = 10),verbosity = 0)
credit_xgb_model
eXtreme Gradient Boosting 

70000 samples
    8 predictor
    2 classes: 'No', 'Yes' 

No pre-processing
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 63000, 63000, 63000, 63000, 63000, 63000, ... 
Resampling results across tuning parameters:

  eta  max_depth  colsample_bytree  subsample  nrounds  Accuracy   Kappa    
  0.3  1          0.6               0.50        50      0.9478286  0.8956571
  0.3  1          0.6               0.50       100      0.9522429  0.9044857
  0.3  1          0.6               0.50       150      0.9573714  0.9147429
  0.3  1          0.6               0.75        50      0.9480286  0.8960571
  0.3  1          0.6               0.75       100      0.9518286  0.9036571
  0.3  1          0.6               0.75       150      0.9557143  0.9114286
  0.3  1          0.6               1.00        50      0.9476714  0.8953429
  0.3  1          0.6               1.00       100      0.9516429  0.9032857
  0.3  1          0.6               1.00       150      0.9554000  0.9108000
  0.3  1          0.8               0.50        50      0.9477714  0.8955429
  0.3  1          0.8               0.50       100      0.9519429  0.9038857
  0.3  1          0.8               0.50       150      0.9569714  0.9139429
  0.3  1          0.8               0.75        50      0.9483000  0.8966000
  0.3  1          0.8               0.75       100      0.9521571  0.9043143
  0.3  1          0.8               0.75       150      0.9563000  0.9126000
  0.3  1          0.8               1.00        50      0.9476714  0.8953429
  0.3  1          0.8               1.00       100      0.9525714  0.9051429
  0.3  1          0.8               1.00       150      0.9552429  0.9104857
  0.3  2          0.6               0.50        50      0.9679571  0.9359143
  0.3  2          0.6               0.50       100      0.9910429  0.9820857
  0.3  2          0.6               0.50       150      0.9963429  0.9926857
  0.3  2          0.6               0.75        50      0.9694714  0.9389429
  0.3  2          0.6               0.75       100      0.9912286  0.9824571
  0.3  2          0.6               0.75       150      0.9964429  0.9928857
  0.3  2          0.6               1.00        50      0.9695429  0.9390857
  0.3  2          0.6               1.00       100      0.9920286  0.9840571
  0.3  2          0.6               1.00       150      0.9964143  0.9928286
  0.3  2          0.8               0.50        50      0.9711143  0.9422286
  0.3  2          0.8               0.50       100      0.9924429  0.9848857
  0.3  2          0.8               0.50       150      0.9967571  0.9935143
  0.3  2          0.8               0.75        50      0.9725143  0.9450286
  0.3  2          0.8               0.75       100      0.9926429  0.9852857
  0.3  2          0.8               0.75       150      0.9965429  0.9930857
  0.3  2          0.8               1.00        50      0.9710857  0.9421714
  0.3  2          0.8               1.00       100      0.9924714  0.9849429
  0.3  2          0.8               1.00       150      0.9965714  0.9931429
  0.3  3          0.6               0.50        50      0.9933714  0.9867429
  0.3  3          0.6               0.50       100      0.9979571  0.9959143
  0.3  3          0.6               0.50       150      0.9987857  0.9975714
  0.3  3          0.6               0.75        50      0.9936143  0.9872286
  0.3  3          0.6               0.75       100      0.9977857  0.9955714
  0.3  3          0.6               0.75       150      0.9988429  0.9976857
  0.3  3          0.6               1.00        50      0.9939857  0.9879714
  0.3  3          0.6               1.00       100      0.9979857  0.9959714
  0.3  3          0.6               1.00       150      0.9990571  0.9981143
  0.3  3          0.8               0.50        50      0.9945286  0.9890571
  0.3  3          0.8               0.50       100      0.9980143  0.9960286
  0.3  3          0.8               0.50       150      0.9990571  0.9981143
  0.3  3          0.8               0.75        50      0.9943714  0.9887429
  0.3  3          0.8               0.75       100      0.9983143  0.9966286
  0.3  3          0.8               0.75       150      0.9990286  0.9980571
  0.3  3          0.8               1.00        50      0.9940571  0.9881143
  0.3  3          0.8               1.00       100      0.9981000  0.9962000
  0.3  3          0.8               1.00       150      0.9989571  0.9979143
  0.4  1          0.6               0.50        50      0.9500571  0.9001143
  0.4  1          0.6               0.50       100      0.9559571  0.9119143
  0.4  1          0.6               0.50       150      0.9603857  0.9207714
  0.4  1          0.6               0.75        50      0.9496000  0.8992000
  0.4  1          0.6               0.75       100      0.9551571  0.9103143
  0.4  1          0.6               0.75       150      0.9605714  0.9211429
  0.4  1          0.6               1.00        50      0.9492000  0.8984000
  0.4  1          0.6               1.00       100      0.9538143  0.9076286
  0.4  1          0.6               1.00       150      0.9598429  0.9196857
  0.4  1          0.8               0.50        50      0.9501714  0.9003429
  0.4  1          0.8               0.50       100      0.9560143  0.9120286
  0.4  1          0.8               0.50       150      0.9613571  0.9227143
  0.4  1          0.8               0.75        50      0.9504000  0.9008000
  0.4  1          0.8               0.75       100      0.9561286  0.9122571
  0.4  1          0.8               0.75       150      0.9596143  0.9192286
  0.4  1          0.8               1.00        50      0.9490857  0.8981714
  0.4  1          0.8               1.00       100      0.9542714  0.9085429
  0.4  1          0.8               1.00       150      0.9597286  0.9194571
  0.4  2          0.6               0.50        50      0.9801857  0.9603714
  0.4  2          0.6               0.50       100      0.9955857  0.9911714
  0.4  2          0.6               0.50       150      0.9974857  0.9949714
  0.4  2          0.6               0.75        50      0.9816286  0.9632571
  0.4  2          0.6               0.75       100      0.9956000  0.9912000
  0.4  2          0.6               0.75       150      0.9974286  0.9948571
  0.4  2          0.6               1.00        50      0.9815143  0.9630286
  0.4  2          0.6               1.00       100      0.9956000  0.9912000
  0.4  2          0.6               1.00       150      0.9975714  0.9951429
  0.4  2          0.8               0.50        50      0.9822714  0.9645429
  0.4  2          0.8               0.50       100      0.9955857  0.9911714
  0.4  2          0.8               0.50       150      0.9973857  0.9947714
  0.4  2          0.8               0.75        50      0.9811286  0.9622571
  0.4  2          0.8               0.75       100      0.9956429  0.9912857
  0.4  2          0.8               0.75       150      0.9974429  0.9948857
  0.4  2          0.8               1.00        50      0.9809429  0.9618857
  0.4  2          0.8               1.00       100      0.9956571  0.9913143
  0.4  2          0.8               1.00       150      0.9975000  0.9950000
  0.4  3          0.6               0.50        50      0.9964000  0.9928000
  0.4  3          0.6               0.50       100      0.9985857  0.9971714
  0.4  3          0.6               0.50       150      0.9991857  0.9983714
  0.4  3          0.6               0.75        50      0.9965286  0.9930571
  0.4  3          0.6               0.75       100      0.9987857  0.9975714
  0.4  3          0.6               0.75       150      0.9992286  0.9984571
  0.4  3          0.6               1.00        50      0.9963286  0.9926571
  0.4  3          0.6               1.00       100      0.9985143  0.9970286
  0.4  3          0.6               1.00       150      0.9992429  0.9984857
  0.4  3          0.8               0.50        50      0.9969714  0.9939429
  0.4  3          0.8               0.50       100      0.9988143  0.9976286
  0.4  3          0.8               0.50       150      0.9991571  0.9983143
  0.4  3          0.8               0.75        50      0.9966286  0.9932571
  0.4  3          0.8               0.75       100      0.9987571  0.9975143
  0.4  3          0.8               0.75       150      0.9993571  0.9987143
  0.4  3          0.8               1.00        50      0.9964857  0.9929714
  0.4  3          0.8               1.00       100      0.9988571  0.9977143
  0.4  3          0.8               1.00       150      0.9992857  0.9985714

Tuning parameter 'gamma' was held constant at a value of 0
Tuning parameter 'min_child_weight' was held constant at
 a value of 1
Accuracy was used to select the optimal model using the largest value.
The final values used for the model were nrounds = 150, max_depth = 3, eta = 0.4, gamma = 0, colsample_bytree =
 0.8, min_child_weight = 1 and subsample = 0.75.
credit_xgb_pred <- predict(credit_xgb_model, test_data)
credit_xgb_cm <- confusionMatrix(credit_xgb_pred, test_data$Class)
credit_xgb_auc <- credit_xgb_cm$overall[1] *100
credit_xgb_acc <- credit_xgb_cm$overall[4] *100
credit_xgb_auc
Accuracy 
99.90667 
credit_xgb_model
eXtreme Gradient Boosting 

70000 samples
    8 predictor
    2 classes: 'No', 'Yes' 

No pre-processing
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 63000, 63000, 63000, 63000, 63000, 63000, ... 
Resampling results across tuning parameters:

  eta  max_depth  colsample_bytree  subsample  nrounds  Accuracy   Kappa    
  0.3  1          0.6               0.50        50      0.9478286  0.8956571
  0.3  1          0.6               0.50       100      0.9522429  0.9044857
  0.3  1          0.6               0.50       150      0.9573714  0.9147429
  0.3  1          0.6               0.75        50      0.9480286  0.8960571
  0.3  1          0.6               0.75       100      0.9518286  0.9036571
  0.3  1          0.6               0.75       150      0.9557143  0.9114286
  0.3  1          0.6               1.00        50      0.9476714  0.8953429
  0.3  1          0.6               1.00       100      0.9516429  0.9032857
  0.3  1          0.6               1.00       150      0.9554000  0.9108000
  0.3  1          0.8               0.50        50      0.9477714  0.8955429
  0.3  1          0.8               0.50       100      0.9519429  0.9038857
  0.3  1          0.8               0.50       150      0.9569714  0.9139429
  0.3  1          0.8               0.75        50      0.9483000  0.8966000
  0.3  1          0.8               0.75       100      0.9521571  0.9043143
  0.3  1          0.8               0.75       150      0.9563000  0.9126000
  0.3  1          0.8               1.00        50      0.9476714  0.8953429
  0.3  1          0.8               1.00       100      0.9525714  0.9051429
  0.3  1          0.8               1.00       150      0.9552429  0.9104857
  0.3  2          0.6               0.50        50      0.9679571  0.9359143
  0.3  2          0.6               0.50       100      0.9910429  0.9820857
  0.3  2          0.6               0.50       150      0.9963429  0.9926857
  0.3  2          0.6               0.75        50      0.9694714  0.9389429
  0.3  2          0.6               0.75       100      0.9912286  0.9824571
  0.3  2          0.6               0.75       150      0.9964429  0.9928857
  0.3  2          0.6               1.00        50      0.9695429  0.9390857
  0.3  2          0.6               1.00       100      0.9920286  0.9840571
  0.3  2          0.6               1.00       150      0.9964143  0.9928286
  0.3  2          0.8               0.50        50      0.9711143  0.9422286
  0.3  2          0.8               0.50       100      0.9924429  0.9848857
  0.3  2          0.8               0.50       150      0.9967571  0.9935143
  0.3  2          0.8               0.75        50      0.9725143  0.9450286
  0.3  2          0.8               0.75       100      0.9926429  0.9852857
  0.3  2          0.8               0.75       150      0.9965429  0.9930857
  0.3  2          0.8               1.00        50      0.9710857  0.9421714
  0.3  2          0.8               1.00       100      0.9924714  0.9849429
  0.3  2          0.8               1.00       150      0.9965714  0.9931429
  0.3  3          0.6               0.50        50      0.9933714  0.9867429
  0.3  3          0.6               0.50       100      0.9979571  0.9959143
  0.3  3          0.6               0.50       150      0.9987857  0.9975714
  0.3  3          0.6               0.75        50      0.9936143  0.9872286
  0.3  3          0.6               0.75       100      0.9977857  0.9955714
  0.3  3          0.6               0.75       150      0.9988429  0.9976857
  0.3  3          0.6               1.00        50      0.9939857  0.9879714
  0.3  3          0.6               1.00       100      0.9979857  0.9959714
  0.3  3          0.6               1.00       150      0.9990571  0.9981143
  0.3  3          0.8               0.50        50      0.9945286  0.9890571
  0.3  3          0.8               0.50       100      0.9980143  0.9960286
  0.3  3          0.8               0.50       150      0.9990571  0.9981143
  0.3  3          0.8               0.75        50      0.9943714  0.9887429
  0.3  3          0.8               0.75       100      0.9983143  0.9966286
  0.3  3          0.8               0.75       150      0.9990286  0.9980571
  0.3  3          0.8               1.00        50      0.9940571  0.9881143
  0.3  3          0.8               1.00       100      0.9981000  0.9962000
  0.3  3          0.8               1.00       150      0.9989571  0.9979143
  0.4  1          0.6               0.50        50      0.9500571  0.9001143
  0.4  1          0.6               0.50       100      0.9559571  0.9119143
  0.4  1          0.6               0.50       150      0.9603857  0.9207714
  0.4  1          0.6               0.75        50      0.9496000  0.8992000
  0.4  1          0.6               0.75       100      0.9551571  0.9103143
  0.4  1          0.6               0.75       150      0.9605714  0.9211429
  0.4  1          0.6               1.00        50      0.9492000  0.8984000
  0.4  1          0.6               1.00       100      0.9538143  0.9076286
  0.4  1          0.6               1.00       150      0.9598429  0.9196857
  0.4  1          0.8               0.50        50      0.9501714  0.9003429
  0.4  1          0.8               0.50       100      0.9560143  0.9120286
  0.4  1          0.8               0.50       150      0.9613571  0.9227143
  0.4  1          0.8               0.75        50      0.9504000  0.9008000
  0.4  1          0.8               0.75       100      0.9561286  0.9122571
  0.4  1          0.8               0.75       150      0.9596143  0.9192286
  0.4  1          0.8               1.00        50      0.9490857  0.8981714
  0.4  1          0.8               1.00       100      0.9542714  0.9085429
  0.4  1          0.8               1.00       150      0.9597286  0.9194571
  0.4  2          0.6               0.50        50      0.9801857  0.9603714
  0.4  2          0.6               0.50       100      0.9955857  0.9911714
  0.4  2          0.6               0.50       150      0.9974857  0.9949714
  0.4  2          0.6               0.75        50      0.9816286  0.9632571
  0.4  2          0.6               0.75       100      0.9956000  0.9912000
  0.4  2          0.6               0.75       150      0.9974286  0.9948571
  0.4  2          0.6               1.00        50      0.9815143  0.9630286
  0.4  2          0.6               1.00       100      0.9956000  0.9912000
  0.4  2          0.6               1.00       150      0.9975714  0.9951429
  0.4  2          0.8               0.50        50      0.9822714  0.9645429
  0.4  2          0.8               0.50       100      0.9955857  0.9911714
  0.4  2          0.8               0.50       150      0.9973857  0.9947714
  0.4  2          0.8               0.75        50      0.9811286  0.9622571
  0.4  2          0.8               0.75       100      0.9956429  0.9912857
  0.4  2          0.8               0.75       150      0.9974429  0.9948857
  0.4  2          0.8               1.00        50      0.9809429  0.9618857
  0.4  2          0.8               1.00       100      0.9956571  0.9913143
  0.4  2          0.8               1.00       150      0.9975000  0.9950000
  0.4  3          0.6               0.50        50      0.9964000  0.9928000
  0.4  3          0.6               0.50       100      0.9985857  0.9971714
  0.4  3          0.6               0.50       150      0.9991857  0.9983714
  0.4  3          0.6               0.75        50      0.9965286  0.9930571
  0.4  3          0.6               0.75       100      0.9987857  0.9975714
  0.4  3          0.6               0.75       150      0.9992286  0.9984571
  0.4  3          0.6               1.00        50      0.9963286  0.9926571
  0.4  3          0.6               1.00       100      0.9985143  0.9970286
  0.4  3          0.6               1.00       150      0.9992429  0.9984857
  0.4  3          0.8               0.50        50      0.9969714  0.9939429
  0.4  3          0.8               0.50       100      0.9988143  0.9976286
  0.4  3          0.8               0.50       150      0.9991571  0.9983143
  0.4  3          0.8               0.75        50      0.9966286  0.9932571
  0.4  3          0.8               0.75       100      0.9987571  0.9975143
  0.4  3          0.8               0.75       150      0.9993571  0.9987143
  0.4  3          0.8               1.00        50      0.9964857  0.9929714
  0.4  3          0.8               1.00       100      0.9988571  0.9977143
  0.4  3          0.8               1.00       150      0.9992857  0.9985714

Tuning parameter 'gamma' was held constant at a value of 0
Tuning parameter 'min_child_weight' was held constant at
 a value of 1
Accuracy was used to select the optimal model using the largest value.
The final values used for the model were nrounds = 150, max_depth = 3, eta = 0.4, gamma = 0, colsample_bytree =
 0.8, min_child_weight = 1 and subsample = 0.75.
credit_xgb_cm
Confusion Matrix and Statistics

          Reference
Prediction    No   Yes
       No  14972     0
       Yes    28 15000
                                          
               Accuracy : 0.9991          
                 95% CI : (0.9987, 0.9994)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.9981          
                                          
 Mcnemar's Test P-Value : 3.352e-07       
                                          
            Sensitivity : 0.9981          
            Specificity : 1.0000          
         Pos Pred Value : 1.0000          
         Neg Pred Value : 0.9981          
             Prevalence : 0.5000          
         Detection Rate : 0.4991          
   Detection Prevalence : 0.4991          
      Balanced Accuracy : 0.9991          
                                          
       'Positive' Class : No              
                                          
train_data$Class=as.numeric(train_data$Class)-1
test_data$Class=as.numeric(test_data$Class)-1
head(test_data)
head(train_data)
normalize <- function(x) {
  return ((x - min(x)) / (max(x) - min(x)))
}
scale.cols <- c("V3","V4","V7","V10","V12","V14","V16","V17")
train_data[scale.cols] <- lapply(train_data[scale.cols], normalize)
test_data[scale.cols] <- lapply(test_data[scale.cols], normalize)
train_data
test_data

#Neural Network Model

set.seed(1)
credit_train_idex1<- createDataPartition(train_data$Class, p = 0.9, list = FALSE)

credit_card_train = train_data[credit_train_idex1,]
credit_card_valid = train_data[-credit_train_idex1,]

credit_card_train_x <- credit_card_train[, -9]
credit_card_train_y <- credit_card_train[, 9]
credit_card_valid_x <- credit_card_valid[,-9]
credit_card_valid_y <- credit_card_valid[, 9]
credit_card_test_x <-test_data[, -9]
credit_card_test_y <- test_data[, 9]
library(keras)

Attaching package: ‘keras’

The following object is masked _by_ ‘.GlobalEnv’:

    normalize
set.seed(1)

model <- keras_model_sequential() %>%
  layer_dense(units = 64, activation = "relu", input_shape = as.matrix(dim(credit_card_train_x)[2])) %>%
  layer_dense(units = 64, activation = "relu") %>%
  layer_dense(units = 1, activation = "sigmoid")
Metal device set to: Apple M2

systemMemory: 8.00 GB
maxCacheSize: 2.67 GB
model %>% compile(
  loss = "binary_crossentropy",
  optimizer = optimizer_adam(learning_rate = 0.001),
  metrics = "accuracy"

  
)
WARNING:absl:At this time, the v2.11+ optimizer `tf.keras.optimizers.Adam` runs slowly on M1/M2 Macs, please use the legacy Keras optimizer instead, located at `tf.keras.optimizers.legacy.Adam`.
WARNING:absl:There is a known slowdown when using v2.11+ Keras optimizers on M1/M2 Macs. Falling back to the legacy Keras optimizer, i.e., `tf.keras.optimizers.legacy.Adam`.
history <- model %>% fit(
  as.matrix(credit_card_train_x),
  credit_card_train_y,
  batch_size = 50,
  epochs = 20,
  verbose = 2,
  validation_data = list(as.matrix(credit_card_valid_x), credit_card_valid_y)
)
2023-05-09 19:17:03.803903: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
Epoch 1/20
1260/1260 - 6s - loss: 0.2170 - accuracy: 0.9220 - val_loss: 0.1770 - val_accuracy: 0.9316 - 6s/epoch - 5ms/step
Epoch 2/20
1260/1260 - 5s - loss: 0.1709 - accuracy: 0.9348 - val_loss: 0.1650 - val_accuracy: 0.9381 - 5s/epoch - 4ms/step
Epoch 3/20
1260/1260 - 5s - loss: 0.1657 - accuracy: 0.9376 - val_loss: 0.1649 - val_accuracy: 0.9393 - 5s/epoch - 4ms/step
Epoch 4/20
1260/1260 - 5s - loss: 0.1656 - accuracy: 0.9377 - val_loss: 0.1637 - val_accuracy: 0.9393 - 5s/epoch - 4ms/step
Epoch 5/20
1260/1260 - 5s - loss: 0.1646 - accuracy: 0.9380 - val_loss: 0.1753 - val_accuracy: 0.9359 - 5s/epoch - 4ms/step
Epoch 6/20
1260/1260 - 5s - loss: 0.1639 - accuracy: 0.9382 - val_loss: 0.1695 - val_accuracy: 0.9356 - 5s/epoch - 4ms/step
Epoch 7/20
1260/1260 - 5s - loss: 0.1622 - accuracy: 0.9384 - val_loss: 0.1618 - val_accuracy: 0.9413 - 5s/epoch - 4ms/step
Epoch 8/20
1260/1260 - 5s - loss: 0.1628 - accuracy: 0.9380 - val_loss: 0.1629 - val_accuracy: 0.9403 - 5s/epoch - 4ms/step
Epoch 9/20
1260/1260 - 5s - loss: 0.1618 - accuracy: 0.9383 - val_loss: 0.1599 - val_accuracy: 0.9407 - 5s/epoch - 4ms/step
Epoch 10/20
1260/1260 - 5s - loss: 0.1602 - accuracy: 0.9385 - val_loss: 0.1686 - val_accuracy: 0.9343 - 5s/epoch - 4ms/step
Epoch 11/20
1260/1260 - 5s - loss: 0.1597 - accuracy: 0.9390 - val_loss: 0.1583 - val_accuracy: 0.9419 - 5s/epoch - 4ms/step
Epoch 12/20
1260/1260 - 5s - loss: 0.1591 - accuracy: 0.9391 - val_loss: 0.1608 - val_accuracy: 0.9409 - 5s/epoch - 4ms/step
Epoch 13/20
1260/1260 - 5s - loss: 0.1582 - accuracy: 0.9398 - val_loss: 0.1560 - val_accuracy: 0.9420 - 5s/epoch - 4ms/step
Epoch 14/20
1260/1260 - 5s - loss: 0.1571 - accuracy: 0.9397 - val_loss: 0.1554 - val_accuracy: 0.9427 - 5s/epoch - 4ms/step
Epoch 15/20
1260/1260 - 5s - loss: 0.1575 - accuracy: 0.9395 - val_loss: 0.1575 - val_accuracy: 0.9401 - 5s/epoch - 4ms/step
Epoch 16/20
1260/1260 - 5s - loss: 0.1556 - accuracy: 0.9404 - val_loss: 0.1569 - val_accuracy: 0.9406 - 5s/epoch - 4ms/step
Epoch 17/20
1260/1260 - 5s - loss: 0.1555 - accuracy: 0.9403 - val_loss: 0.1561 - val_accuracy: 0.9420 - 5s/epoch - 4ms/step
Epoch 18/20
1260/1260 - 5s - loss: 0.1547 - accuracy: 0.9400 - val_loss: 0.1562 - val_accuracy: 0.9400 - 5s/epoch - 4ms/step
Epoch 19/20
1260/1260 - 5s - loss: 0.1540 - accuracy: 0.9411 - val_loss: 0.1515 - val_accuracy: 0.9420 - 5s/epoch - 4ms/step
Epoch 20/20
1260/1260 - 5s - loss: 0.1527 - accuracy: 0.9412 - val_loss: 0.1514 - val_accuracy: 0.9406 - 5s/epoch - 4ms/step
predicted_probs <- model %>% predict(as.matrix(credit_card_test_x))

  1/938 [..............................] - ETA: 46s
 46/938 [>.............................] - ETA: 0s 
 88/938 [=>............................] - ETA: 0s
133/938 [===>..........................] - ETA: 0s
179/938 [====>.........................] - ETA: 0s
227/938 [======>.......................] - ETA: 0s
275/938 [=======>......................] - ETA: 0s
322/938 [=========>....................] - ETA: 0s
371/938 [==========>...................] - ETA: 0s
419/938 [============>.................] - ETA: 0s
466/938 [=============>................] - ETA: 0s
513/938 [===============>..............] - ETA: 0s
559/938 [================>.............] - ETA: 0s
608/938 [==================>...........] - ETA: 0s
655/938 [===================>..........] - ETA: 0s
703/938 [=====================>........] - ETA: 0s
750/938 [======================>.......] - ETA: 0s
797/938 [========================>.....] - ETA: 0s
844/938 [=========================>....] - ETA: 0s
893/938 [===========================>..] - ETA: 0s
938/938 [==============================] - 1s 1ms/step

938/938 [==============================] - 1s 1ms/step
str(predicted_probs)
 num [1:30000, 1] 0.00629 0.99777 0.19117 1 0.01484 ...

ann_predictions= prediction(predicted_probs,credit_card_test_y)
ann_predictions
A prediction instance
  with 30000 data points
performance(ann_predictions, measure = "auc")@y.values
[[1]]
[1] 0.9790121
plot(history)

predicted.labels = factor(ifelse(predicted_probs>0.5, "1", "0"))
confusionMatrix(predicted.labels, as.factor(credit_card_test_y),mode="everything", positive="1")
Confusion Matrix and Statistics

          Reference
Prediction     0     1
         0 14922  2095
         1    78 12905
                                          
               Accuracy : 0.9276          
                 95% CI : (0.9246, 0.9305)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.8551          
                                          
 Mcnemar's Test P-Value : < 2.2e-16       
                                          
            Sensitivity : 0.8603          
            Specificity : 0.9948          
         Pos Pred Value : 0.9940          
         Neg Pred Value : 0.8769          
              Precision : 0.9940          
                 Recall : 0.8603          
                     F1 : 0.9223          
             Prevalence : 0.5000          
         Detection Rate : 0.4302          
   Detection Prevalence : 0.4328          
      Balanced Accuracy : 0.9276          
                                          
       'Positive' Class : 1               
                                          

The Accuracy of Neural Network Model is 90.7

model
Model: "sequential"
______________________________________________________________________________________________________________________
 Layer (type)                                        Output Shape                                   Param #           
======================================================================================================================
 dense_2 (Dense)                                     (None, 64)                                     576               
 dense_1 (Dense)                                     (None, 64)                                     4160              
 dense (Dense)                                       (None, 1)                                      65                
======================================================================================================================
Total params: 4,801
Trainable params: 4,801
Non-trainable params: 0
______________________________________________________________________________________________________________________

#Neural Network Model With Classweights.

library(keras)

model_weight <- keras_model_sequential() %>%
  layer_dense(units = 64, activation = "relu", input_shape = as.matrix(dim(credit_card_train_x)[2])) %>%
  layer_dense(units = 64, activation = "relu") %>%
  layer_dense(units = 1, activation = "sigmoid")


model_weight %>% compile(
  loss = "binary_crossentropy",
  optimizer = optimizer_adam(learning_rate = 0.001),
  metrics = "accuracy"
)
WARNING:absl:At this time, the v2.11+ optimizer `tf.keras.optimizers.Adam` runs slowly on M1/M2 Macs, please use the legacy Keras optimizer instead, located at `tf.keras.optimizers.legacy.Adam`.
WARNING:absl:There is a known slowdown when using v2.11+ Keras optimizers on M1/M2 Macs. Falling back to the legacy Keras optimizer, i.e., `tf.keras.optimizers.legacy.Adam`.
history_model <- model_weight %>% fit(
  as.matrix(credit_card_train_x),
  credit_card_train_y,
  batch_size = 50,
  epochs = 20,
  verbose = 2,
  validation_data = list(as.matrix(credit_card_valid_x), credit_card_valid_y),class_weight=list("0", "1")

)
Epoch 1/20
1260/1260 - 6s - loss: 0.2212 - accuracy: 0.9148 - val_loss: 0.1861 - val_accuracy: 0.9326 - 6s/epoch - 5ms/step
Epoch 2/20
1260/1260 - 5s - loss: 0.1701 - accuracy: 0.9353 - val_loss: 0.1773 - val_accuracy: 0.9363 - 5s/epoch - 4ms/step
Epoch 3/20
1260/1260 - 5s - loss: 0.1649 - accuracy: 0.9367 - val_loss: 0.1621 - val_accuracy: 0.9403 - 5s/epoch - 4ms/step
Epoch 4/20
1260/1260 - 5s - loss: 0.1629 - accuracy: 0.9380 - val_loss: 0.1625 - val_accuracy: 0.9411 - 5s/epoch - 4ms/step
Epoch 5/20
1260/1260 - 5s - loss: 0.1628 - accuracy: 0.9378 - val_loss: 0.1603 - val_accuracy: 0.9413 - 5s/epoch - 4ms/step
Epoch 6/20
1260/1260 - 5s - loss: 0.1614 - accuracy: 0.9382 - val_loss: 0.1591 - val_accuracy: 0.9409 - 5s/epoch - 4ms/step
Epoch 7/20
1260/1260 - 5s - loss: 0.1621 - accuracy: 0.9383 - val_loss: 0.1668 - val_accuracy: 0.9374 - 5s/epoch - 4ms/step
Epoch 8/20
1260/1260 - 5s - loss: 0.1606 - accuracy: 0.9391 - val_loss: 0.1600 - val_accuracy: 0.9401 - 5s/epoch - 4ms/step
Epoch 9/20
1260/1260 - 5s - loss: 0.1596 - accuracy: 0.9394 - val_loss: 0.1615 - val_accuracy: 0.9391 - 5s/epoch - 4ms/step
Epoch 10/20
1260/1260 - 5s - loss: 0.1596 - accuracy: 0.9392 - val_loss: 0.1598 - val_accuracy: 0.9403 - 5s/epoch - 4ms/step
Epoch 11/20
1260/1260 - 5s - loss: 0.1585 - accuracy: 0.9392 - val_loss: 0.1700 - val_accuracy: 0.9406 - 5s/epoch - 4ms/step
Epoch 12/20
1260/1260 - 5s - loss: 0.1570 - accuracy: 0.9394 - val_loss: 0.1573 - val_accuracy: 0.9381 - 5s/epoch - 4ms/step
Epoch 13/20
1260/1260 - 5s - loss: 0.1561 - accuracy: 0.9402 - val_loss: 0.1558 - val_accuracy: 0.9414 - 5s/epoch - 4ms/step
Epoch 14/20
1260/1260 - 5s - loss: 0.1568 - accuracy: 0.9399 - val_loss: 0.1552 - val_accuracy: 0.9414 - 5s/epoch - 4ms/step
Epoch 15/20
1260/1260 - 5s - loss: 0.1546 - accuracy: 0.9401 - val_loss: 0.1529 - val_accuracy: 0.9391 - 5s/epoch - 4ms/step
Epoch 16/20
1260/1260 - 5s - loss: 0.1534 - accuracy: 0.9406 - val_loss: 0.1512 - val_accuracy: 0.9416 - 5s/epoch - 4ms/step
Epoch 17/20
1260/1260 - 5s - loss: 0.1526 - accuracy: 0.9407 - val_loss: 0.1513 - val_accuracy: 0.9427 - 5s/epoch - 4ms/step
Epoch 18/20
1260/1260 - 5s - loss: 0.1508 - accuracy: 0.9415 - val_loss: 0.1502 - val_accuracy: 0.9423 - 5s/epoch - 4ms/step
Epoch 19/20
1260/1260 - 5s - loss: 0.1496 - accuracy: 0.9421 - val_loss: 0.1484 - val_accuracy: 0.9436 - 5s/epoch - 4ms/step
Epoch 20/20
1260/1260 - 5s - loss: 0.1492 - accuracy: 0.9419 - val_loss: 0.1554 - val_accuracy: 0.9389 - 5s/epoch - 4ms/step
plot(history_model)

predicted_probs_weight <- model_weight %>% predict(as.matrix(credit_card_test_x))

  1/938 [..............................] - ETA: 32s
 46/938 [>.............................] - ETA: 0s 
 93/938 [=>............................] - ETA: 0s
140/938 [===>..........................] - ETA: 0s
184/938 [====>.........................] - ETA: 0s
231/938 [======>.......................] - ETA: 0s
279/938 [=======>......................] - ETA: 0s
325/938 [=========>....................] - ETA: 0s
367/938 [==========>...................] - ETA: 0s
414/938 [============>.................] - ETA: 0s
463/938 [=============>................] - ETA: 0s
511/938 [===============>..............] - ETA: 0s
558/938 [================>.............] - ETA: 0s
606/938 [==================>...........] - ETA: 0s
654/938 [===================>..........] - ETA: 0s
694/938 [=====================>........] - ETA: 0s
739/938 [======================>.......] - ETA: 0s
785/938 [========================>.....] - ETA: 0s
832/938 [=========================>....] - ETA: 0s
878/938 [===========================>..] - ETA: 0s
925/938 [============================>.] - ETA: 0s
938/938 [==============================] - 1s 1ms/step

938/938 [==============================] - 1s 1ms/step
str(predicted_probs_weight)
 num [1:30000, 1] 0.00487 0.99236 0.3171 1 0.02441 ...
ann_predictions_weight= prediction(predicted_probs,credit_card_test_y)
ann_predictions_weight
A prediction instance
  with 30000 data points
performance(ann_predictions_weight, measure = "auc")@y.values
[[1]]
[1] 0.9790121
range(credit_card_test_x)
[1] 0 1
predicted.labels_weight = factor(ifelse(predicted_probs_weight>0.5, "1", "0"))
confusionMatrix(predicted.labels_weight, as.factor(credit_card_test_y),mode="everything", positive="1")
Confusion Matrix and Statistics

          Reference
Prediction     0     1
         0 14901  2106
         1    99 12894
                                          
               Accuracy : 0.9265          
                 95% CI : (0.9235, 0.9294)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.853           
                                          
 Mcnemar's Test P-Value : < 2.2e-16       
                                          
            Sensitivity : 0.8596          
            Specificity : 0.9934          
         Pos Pred Value : 0.9924          
         Neg Pred Value : 0.8762          
              Precision : 0.9924          
                 Recall : 0.8596          
                     F1 : 0.9212          
             Prevalence : 0.5000          
         Detection Rate : 0.4298          
   Detection Prevalence : 0.4331          
      Balanced Accuracy : 0.9265          
                                          
       'Positive' Class : 1               
                                          

#Summary of Accuracy

Random Forest: 99.99%

DecisionTreeModel using C50: 99.97%

XGBoost: 99.93797%

KNN Smote: 99.38%

KNN: 99.38%

Gradient Boosted Tree model: 97.17%

SVM Radical: 97.04%

NeuralNetwork with class weight: 92.64%

DecisionTreeModel using rpart: 93.50195%

SVM Smote: 94.21%

SVM: 94.21%

Neural Network Model: 90.7%

---
title: "Credit Card Fraud Detection"
output: html_notebook
---

# PROJECT TOPIC -  CREDIT CARD FRAUD DETECTION

#1. Data Description:
Time- Number of seconds elapsed between this transaction and the first transaction in the dataset
V1-V28 - Result of a PCA Dimensionality reduction to protect user identities and sensitive features(v1-v28)
Amount - Transaction amount
Class - 1 for fraudulent transactions, 0 otherwise

#Goal

To identify fraudulent transactions  which can result in significant financial losses for individuals and businesses. The goal is to develop a machine learning model that can accurately classify transactions as either fraudulent or legitimate, based on various features such as transaction amount, location, and time.

#Dataset Reference:

The Credit Card Fraud Detection dataset on Kaggle at the following link https://www.kaggle.com/mlg-ulb/creditcardfraud  Links to an external site.
The dataset has 31 variables, with the target variable indicating whether a transaction is fraudulent or not.

#Model:

I will be using a classification model to solve the problem of credit card fraud detection. The outcome variable will be a binary variable indicating whether a credit card transaction is fraudulent or not.

```{r}
library(tidyverse)
library(caret)
library(ggplot2)
library(lattice)
library(gridExtra)
library(corrplot)
library(ROCR)
```


#1 Loading the dataset:
```{r}
credit_card <- read.csv("/Users/vinoth/Downloads/creditcard.csv")
credit_card$Class = factor(credit_card$Class,levels = c(0,1))
```

# Getting the summary of structure of dataset
```{r}
head(credit_card)
dim(credit_card)
str(credit_card)
summary(credit_card)
```
# Using duplicated function to duplicated elements.
```{r}
duplicated(credit_card)
```


#Number of missing values in realtor dataset.
```{r}
sum(is.na(credit_card))
```

There are no missing values in the dataset.


#Class Distribution: 

```{r}
table(credit_card$Class)
barplot(table(credit_card$Class))
```

#Time and Amount Analysis: 
```{r}
hist(credit_card$Time)
hist(credit_card$Amount)
```
```{r}
boxplot(Amount ~ Class, data = credit_card, xlab = "Class", ylab = "Amount")
plot(credit_card$V1, credit_card$V2, col = ifelse(credit_card$Class == 1, "red", "blue"),xlab = "V1", ylab = "V2")
```

#Finding the Correlation of every feature vs Class Feature.
```{r}
correlation <- cor(credit_card[,-31], as.numeric(credit_card[,31]))
correlation
```
The columns labeled as v1, v3, v10, v12, v14, v16, v17, and v18 exhibit a negative correlation, indicating a strong inverse relationship. Conversely, the columns v2, v4, and v11 demonstrate a positive correlation, indicating a direct relationship. The remaining columns do not display a significant correlation with the class variable, suggesting that they are not strongly associated with it.

```{r}
corrplot(correlation, method = "pie", cl.pos='n')
```

On the Above observation we can conclude that:

Variables with Strong Negative Correlations:
V10 (-0.2168829436)
V12 (-0.2605929249)
V14 (-0.3025436958)
V17 (-0.3264810672)
Variables with Moderate Negative Correlations:
V3 (-0.1929608271)
V7 (-0.1872565915)
V16 (-0.1965389403)
Variables with Moderate Positive Correlations:
V4 (0.1334474862)

#removing less correlated variables.
```{r}
credit_card <- select(credit_card, V3, V4, V7, V10, V12, V14, V16, V17, Class)
credit_card
```

```{r}
summary(credit_card)
```

#Box Plot, Density plot for all Feature vs Class

```{r}
boxplot_plots <- list()
density_plots <- list()

for (var in c("V3", "V4", "V7", "V10", "V12", "V14", "V16", "V17")) {

  boxplot_plot <- ggplot(credit_card, aes(x = factor(Class), y = .data[[var]], fill = factor(Class))) +
    geom_boxplot() +
    labs(title = paste("Boxplot of", var, "by Class")) +
    theme_minimal()

    boxplot_plots <- append(boxplot_plots, list(boxplot_plot))

  density_plot <- ggplot(credit_card, aes(x = .data[[var]], fill = factor(Class))) +
    geom_density(alpha = 0.7) +
    labs(title = paste("Density Plot of", var, "by Class")) +
    theme_minimal()
  
   density_plots <- append(density_plots, list(density_plot))
}

for (plot in boxplot_plots) {
  print(plot)
}

for (plot in density_plots) {
  print(plot)
}
```


```{r}
#t-test
for (var in c("V3", "V4", "V7", "V10", "V12", "V14", "V16", "V17")) {
  class_0 <- credit_card[[var]][credit_card$Class == 0]
  class_1 <- credit_card[[var]][credit_card$Class == 1]
  t_test <- t.test(class_0, class_1)
  cat(paste("Variable:", var, "\n"))
  cat(paste("t-test p-value:", t_test$p.value, "\n\n"))
}
```

#Under Sampling and Over Sampling of data

```{r}
credit_card_0 <- credit_card[credit_card$Class == 0, ]
credit_card_1 <- credit_card[credit_card$Class == 1, ]

#Undersampling
credit_card_0_down <- credit_card_0[sample(nrow(credit_card_0), 50000), ]

# Oversampling
credit_card_1_over <- credit_card_1[sample(nrow(credit_card_1), 50000, replace = TRUE), ]

#Combining
credit_card <- rbind(credit_card_0_down, credit_card_1_over)

credit_card
```

```{r}
dim(credit_card)
summary(credit_card)
```

#Radomizing the data before spliting
```{r}
credit_card <- credit_card[sample(nrow(credit_card)), ]
credit_card
```

#Spliting the data into training and testing sets
```{r}
levels(credit_card$Class)=list(No ="0", Yes="1")
set.seed(123)
train_indices <- createDataPartition(credit_card$Class, p = 0.7, list = FALSE)
train_data <- credit_card[train_indices, ]
test_data <- credit_card[-train_indices, ]

```

```{r}
# Check the dimensions of the train and test sets
dim(train_data)
dim(test_data)
str(train_data)
str(test_data)
```
# Decision tree using c50.
```{r}
library(C50)

model_boosted <- C5.0(Class ~ ., data = train_data, trials = 30)

pred_boosted <- predict(model_boosted, newdata = test_data, type = "class")

confusion_matrix_boosted <- table(test_data$Class, pred_boosted)

confusion_matrix_boosted

accuracy_boosted <- sum(diag(confusion_matrix_boosted)) / sum(confusion_matrix_boosted)

total_error_boosted <- 1 - accuracy_boosted

precision_boosted <- diag(confusion_matrix_boosted) / colSums(confusion_matrix_boosted)
recall_boosted <- diag(confusion_matrix_boosted) / rowSums(confusion_matrix_boosted)

precision_recall_boosted <- data.frame(Precision = precision_boosted, Recall = recall_boosted)

precision_recall_total_boosted <- rbind(precision_recall_boosted, c(mean(precision_boosted), mean(recall_boosted)))

precision_recall_total_boosted$Labels <- c(levels(test_data$Class), "Average")

print(confusion_matrix_boosted)
print(accuracy_boosted)
print(paste0("Total Error: ", round(total_error_boosted, 4)))
print(precision_recall_total_boosted)
```

#Desicion Tree using rpart
```{r}
credit_data_model <- train(Class ~ ., data = train_data, method = "rpart", trControl = trainControl(method = "cv", number = 5))
credit_data_pred <- predict(credit_data_model, test_data)
credit_data_cm <- confusionMatrix(credit_data_pred, test_data$Class)
credit_data_auc <- credit_data_cm$overall[1] *100
credit_data_auc
credit_data_acc <- credit_data_cm$overall[4] *100
credit_data_acc
cat("Accuracy of Decision Tree Model" ,credit_data_acc,". Summary of ROC Curve ",credit_data_auc)
credit_data_model
```
#Knn

```{r}
set.seed(1)
knn_credit_card<- train(Class ~ ., data = train_data, trControl = trainControl(method="cv", number=10, classProbs = TRUE, summaryFunction = twoClassSummary) ,method = "knn",metric ="ROC")
knn_credit_card
```

```{r}
knn_predictions_prob=predict(knn_credit_card, test_data, type="prob")
knn_predictions= prediction(knn_predictions_prob$Yes,test_data$Class)
performance(knn_predictions, measure = "auc")
knn_predicted_labels = predict(knn_credit_card, test_data)
confusionMatrix(knn_predicted_labels, test_data$Class ,positive="Yes", mode="everything")
```

```{r}
plot(knn_predictions_prob)
```

Accuracy : 0.9938

```{r}
set.seed(1)
knn_smote<- train(Class ~ ., data = train_data,trControl  =trainControl(method="cv", number=10, classProbs = TRUE, summaryFunction = twoClassSummary,sampling="smote") ,method = "knn",metric ="ROC")
knn_smote
```

```{r}
knn_smote_predictions_prob=predict(knn_smote, test_data, type="prob")
knn_smote_predictions= prediction(knn_smote_predictions_prob$Yes,test_data$Class)

performance(knn_smote_predictions, measure = "auc")
knn_smote_predicted_labels = predict(knn_smote, test_data)
confusionMatrix(knn_smote_predicted_labels, test_data$Class ,positive="Yes", mode="everything")
```

Accuracy : 0.9938


#SVM

```{r}
set.seed(1)

svm_credit<- train(Class ~ ., data = train_data,
             trControl = trainControl(method="cv", number=10, classProbs = TRUE, summaryFunction = twoClassSummary) ,method = "svmLinear",metric ="ROC")
svm_credit
```
```{r}
svm_predictions_prob=predict(svm_credit, test_data, type="prob")
svm_predictions= prediction(svm_predictions_prob$Yes,test_data$Class)
svm_predictions_prob

performance(svm_predictions, measure = "auc")
svm_predicted_labels = predict(svm_credit, test_data)
confusionMatrix(svm_predicted_labels, test_data$Class ,positive="Yes", mode="everything")

```
 Accuracy : 0.9421

#SVM Smote
```{r}
set.seed(1)
ctrl=trainControl(method="cv", number=10, classProbs = TRUE, summaryFunction = twoClassSummary, 
sampling="smote")
svm_smote<-train(Class ~ ., data = train_data, method = "svmLinear", verbose=FALSE, metric ="ROC", 
trControl= ctrl)
svm_smote

svm_smote_predictions_prob=predict(svm_smote, test_data, type="prob")
svm_smote_predictions= prediction(svm_smote_predictions_prob$Yes,test_data$Class)

performance(svm_smote_predictions, measure = "auc")
svm_smote_predicted_labels = predict(svm_smote, test_data)
confusionMatrix(svm_smote_predicted_labels, test_data$Class ,positive="Yes", mode="everything")
```


Accuracy : 0.9421


## SVM Radial

```{r}
set.seed(1)
credit_rsvm_model <- train(Class ~ ., data = train_data, method = "svmRadial", trControl = trainControl(method = "cv", number = 10))
credit_rsvm_model
credit_rsvm_pred <- predict(credit_rsvm_model, test_data)
credit_rsvm_cm <- confusionMatrix(credit_rsvm_pred, test_data$Class)
credit_rsvm_auc <- credit_rsvm_cm$overall[1] *100
credit_rsvm_acc <- credit_rsvm_cm$overall[4] *100
credit_rsvm_auc

```
Accuracy 96.85 

```{r}
credit_rsvm_model
credit_rsvm_cm
```


#RandomForest

```{r}
set.seed(1)
credit_rf_model <- train(Class ~ ., data = train_data, method = "rf",importance= T,metric = "ROC", trControl = trainControl(method = "cv", number = 10,  classProbs = TRUE))
credit_rf_model
```

```{r}
credit_rf_pred <- predict(credit_rf_model, test_data)
credit_rf_cm <- confusionMatrix(credit_rf_pred, test_data$Class)
credit_rf_auc <- credit_rf_cm$overall[1]
credit_rf_acc <- credit_rf_cm$overall[4]
credit_rf_auc
varImp(credit_rf_model)
```

```{r}
credit_rf_model
credit_rf_cm
```


#Gradient Boosted Tree model

```{r}
set.seed(1)
credit_gbm_model <- train(Class ~ ., data = train_data, method = "gbm", trControl  = trainControl(method = "cv", number = 10))
credit_gbm_predictions <- predict(credit_gbm_model,test_data)
credit_gbm_cm <- confusionMatrix(credit_gbm_predictions, test_data$Class)
credit_gbm_auc <- credit_gbm_cm$overall[1]
credit_gbm_auc
credit_gbm_acc <- credit_gbm_cm$overall[4]
credit_gbm_acc
```

Accuracy : 0.9698 
```{r}
credit_gbm_model
credit_gbm_cm
```

## XGBoost

```{r}
set.seed(1)
credit_xgb_model <- train(Class ~ ., data = train_data, method = "xgbTree", trControl = trainControl(method = "cv", number = 10),verbosity = 0)
credit_xgb_model
credit_xgb_pred <- predict(credit_xgb_model, test_data)
credit_xgb_cm <- confusionMatrix(credit_xgb_pred, test_data$Class)
credit_xgb_auc <- credit_xgb_cm$overall[1] *100
credit_xgb_acc <- credit_xgb_cm$overall[4] *100
credit_xgb_auc
```

```{r}
credit_xgb_model
credit_xgb_cm
```


```{r}
train_data$Class=as.numeric(train_data$Class)-1
test_data$Class=as.numeric(test_data$Class)-1
```


```{r}
head(test_data)
head(train_data)
```
```{r}
normalize <- function(x) {
  return ((x - min(x)) / (max(x) - min(x)))
}
scale.cols <- c("V3","V4","V7","V10","V12","V14","V16","V17")
train_data[scale.cols] <- lapply(train_data[scale.cols], normalize)
test_data[scale.cols] <- lapply(test_data[scale.cols], normalize)
train_data
test_data
```
#Neural Network Model

```{r}
set.seed(1)
credit_train_idex1<- createDataPartition(train_data$Class, p = 0.9, list = FALSE)

credit_card_train = train_data[credit_train_idex1,]
credit_card_valid = train_data[-credit_train_idex1,]

credit_card_train_x <- credit_card_train[, -9]
credit_card_train_y <- credit_card_train[, 9]
credit_card_valid_x <- credit_card_valid[,-9]
credit_card_valid_y <- credit_card_valid[, 9]
credit_card_test_x <-test_data[, -9]
credit_card_test_y <- test_data[, 9]
```



```{r}
library(keras)
set.seed(1)

model <- keras_model_sequential() %>%
  layer_dense(units = 64, activation = "relu", input_shape = as.matrix(dim(credit_card_train_x)[2])) %>%
  layer_dense(units = 64, activation = "relu") %>%
  layer_dense(units = 1, activation = "sigmoid")


model %>% compile(
  loss = "binary_crossentropy",
  optimizer = optimizer_adam(learning_rate = 0.001),
  metrics = "accuracy"

  
)

history <- model %>% fit(
  as.matrix(credit_card_train_x),
  credit_card_train_y,
  batch_size = 50,
  epochs = 20,
  verbose = 2,
  validation_data = list(as.matrix(credit_card_valid_x), credit_card_valid_y)
)

predicted_probs <- model %>% predict(as.matrix(credit_card_test_x))
str(predicted_probs)

```
```{r}

ann_predictions= prediction(predicted_probs,credit_card_test_y)
ann_predictions
performance(ann_predictions, measure = "auc")@y.values
```

```{r}
plot(history)
```

```{r}
predicted.labels = factor(ifelse(predicted_probs>0.5, "1", "0"))
confusionMatrix(predicted.labels, as.factor(credit_card_test_y),mode="everything", positive="1")

```
The Accuracy of Neural Network Model is 90.7

```{r}
model
```

#Neural Network Model With Classweights.
```{r}
library(keras)

model_weight <- keras_model_sequential() %>%
  layer_dense(units = 64, activation = "relu", input_shape = as.matrix(dim(credit_card_train_x)[2])) %>%
  layer_dense(units = 64, activation = "relu") %>%
  layer_dense(units = 1, activation = "sigmoid")


model_weight %>% compile(
  loss = "binary_crossentropy",
  optimizer = optimizer_adam(learning_rate = 0.001),
  metrics = "accuracy"
)

history_model <- model_weight %>% fit(
  as.matrix(credit_card_train_x),
  credit_card_train_y,
  batch_size = 50,
  epochs = 20,
  verbose = 2,
  validation_data = list(as.matrix(credit_card_valid_x), credit_card_valid_y),class_weight=list("0", "1")

)
```

```{R}
plot(history_model)
```

```{R}
predicted_probs_weight <- model_weight %>% predict(as.matrix(credit_card_test_x))
str(predicted_probs_weight)

```


```{R}
ann_predictions_weight= prediction(predicted_probs,credit_card_test_y)
ann_predictions_weight
performance(ann_predictions_weight, measure = "auc")@y.values

```

```{R}
range(credit_card_test_x)
```

```{R}
predicted.labels_weight = factor(ifelse(predicted_probs_weight>0.5, "1", "0"))
confusionMatrix(predicted.labels_weight, as.factor(credit_card_test_y),mode="everything", positive="1")
```


#Summary of Accuracy

Random Forest: 99.99%

DecisionTreeModel using C50: 99.97%

XGBoost: 99.93797%

KNN Smote: 99.38%

KNN: 99.38%

Gradient Boosted Tree model: 97.17%

SVM Radical: 97.04%

NeuralNetwork with class weight: 92.64%

DecisionTreeModel using rpart: 93.50195%

SVM Smote: 94.21%

SVM: 94.21%

Neural Network Model: 90.7%


